Data Cleaning

getwd()
## [1] "/Users/victoriafield/Library/Mobile Documents/com~apple~CloudDocs/Masters Thesis/Chapter Two CSLAP Manuscript/R - data and outputs"
CSLAP<-read.csv("./raw_data/CSLAP 2012-2017 EDI Original.csv", na.strings = c("", " ", "NA", "NaN"))
str(CSLAP)
## 'data.frame':    5025 obs. of  47 variables:
##  $ LakeID                       : chr  "1005GLE0441" "1005GLE0441" "1005GLE0441" "1005GLE0441" ...
##  $ Lake.Name                    : chr  "Glen Lake" "Glen Lake" "Glen Lake" "Glen Lake" ...
##  $ Proj..Code                   : num  8 8 8 8 8 8 8 8 8 8 ...
##  $ Sample.Name                  : chr  "16-08-02" "16-08-03" "16-08-04" "16-08-05" ...
##  $ Sample.Date                  : chr  "07/04/2016" "07/11/2016" "07/26/2016" "08/01/2016" ...
##  $ Data.Provider                : chr  "CSL" "CSL" "CSL" "CSL" ...
##  $ Info.Type                    : chr  "OW" "OW" "OW" "OW" ...
##  $ LocationID                   : chr  "1005GLE0441_C" "1005GLE0441_C" "1005GLE0441_C" "1005GLE0441_C" ...
##  $ Latitude                     : num  43.4 43.4 43.4 43.4 43.4 ...
##  $ Longitude                    : num  -73.7 -73.7 -73.7 -73.7 -73.7 ...
##  $ Sample.Depth..m.             : num  1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 1.5 ...
##  $ Secchi.Depth..m.             : num  3.95 4 3.95 4 5.95 4.05 4.9 3.55 2.6 3.3 ...
##  $ DO..mg.L.                    : logi  NA NA NA NA NA NA ...
##  $ Bottom.DO..mg.L.             : logi  NA NA NA NA NA NA ...
##  $ pH                           : num  7.59 7.55 8.37 7.37 7.37 7.51 7.99 7.77 7.76 7.45 ...
##  $ TP..mg.L.                    : num  0.0068 0.0081 0.0077 0.0061 0.0063 0.0067 0.0076 0.0086 NA NA ...
##  $ TDP..mg.L.                   : logi  NA NA NA NA NA NA ...
##  $ Conductance..umho.cm.        : num  424 363 451 406 400 ...
##  $ Temperature..air..deg.C.     : num  21 23 25 22 17 25 26 24 22 21 ...
##  $ Temperature..water..deg.C.   : num  24 26 26 26 19 27 26 23 25 22 ...
##  $ TN..mg.L.                    : num  0.321 0.197 0.202 0.214 0.215 0.515 0.29 0.229 0.291 0.189 ...
##  $ TDN..mg.L.                   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Ammonia..mg.L.               : num  NA 0.028 NA 0.076 0.102 NA 0.0075 NA 0.022 0 ...
##  $ NOx..mg.L.                   : num  NA 0.0035 NA 0.034 0.029 NA 0.0035 NA 0.00275 NA ...
##  $ Nitrate..mg.L.               : logi  NA NA NA NA NA NA ...
##  $ Nitrite..mg.L.               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ True.Color..ptu.             : num  15 23 15 18 11 16 22 14 6 3 ...
##  $ Chloride..mg.L.              : num  NA 51.7 NA NA NA NA 74.1 NA NA NA ...
##  $ Calcium..mg.L.               : num  NA NA NA NA NA NA NA NA 21.3 NA ...
##  $ Chl.a..probe...ug.L.         : logi  NA NA NA NA NA NA ...
##  $ Extracted.Chl.a..ug.L.       : num  0.8 1.7 1.5 0.6 1.1 1.7 1.5 3.8 13.3 3.2 ...
##  $ UFI.SBK.FP.Chl.a..ug.L.      : logi  NA NA NA NA NA NA ...
##  $ UFI.SBK.FP.BGA..ug.L.        : logi  NA NA NA NA NA NA ...
##  $ X.UFI.SBK.Microcystin..ug.L..: logi  NA NA NA NA NA NA ...
##  $ ESF.FP.Chl.a..ug.L.          : num  NA NA NA NA NA NA NA NA 2.45 2.21 ...
##  $ ESF.FP.BGA..ug.L.            : num  0 0 0.36 0.2 0 0.17 0.19 0.42 0 0 ...
##  $ ESF.Microcystin..ug.L.       : chr  "ND" "ND" "ND" "ND" ...
##  $ HAB.Status                   : chr  NA NA NA "No Bloom" ...
##  $ Alkalinity.as.CaCO3..mg.L.   : logi  NA NA NA NA NA NA ...
##  $ DOC..mg.L.                   : logi  NA NA NA NA NA NA ...
##  $ UV.254..1.cm.                : logi  NA NA NA NA NA NA ...
##  $ Iron..ug.L.                  : logi  NA NA NA NA NA NA ...
##  $ Manganese..ug.L.             : logi  NA NA NA NA NA NA ...
##  $ Arsenic..ug.L.               : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ QA                           : int  3 2 2 2 NA 2 2 2 3 2 ...
##  $ QB                           : int  3 3 3 3 NA 4 3 3 3 3 ...
##  $ QC                           : int  3 2 3 3 NA 2 2 3 3 2 ...

Based on output from str() and prior knowledge of the data, to clean this dataset I need to:

-Change column names and remove units

-Identify duplicate levels of Lake_Name, we know there is a duplicate because we have 73 levels of the unique factor LakeID and 72 levels of Lake_Name

-Split Sample_Date variable into two more columns: Sample_Year and Sample_Month

-For each lake, identify how many years the observations span (If less than 3, remove from dataset)

-Clean levels of Data_Provider to one level (“CSL”)

-Clean levels of Info_Type

-Remove character strings from ESF_Microcystin_ug.L

-Read in a merge values for Mean Depth, Catchment Area to Surface Area Ratio, Dreissenid Status By Year, and %Agricultural Cover

Change column names

colnames(CSLAP)<-c("LakeID", "Lake_Name", "Proj_Code", "Sample_Name", "Sample_Date", "Data_Provider", "Info_Type", "LocationID", "Latitude", "Longitude", "Sample_Depth", "Secchi_Depth", "DO", "Bottom_DO","pH", "TP", "TDP", "Conductance", "Temp_Air", "Temp_Water", "TN", "TDN" , "Ammonia", "NOx", "Nitrate", "Nitrite", "True_Color", "Chloride", "Calcium", "Chl.a_FP", "Extracted_Chl.a", "UFI_SBK_FP_Chl.a", "UFI_SBK_FP_BGA", "UFI_SBK_Microcystin", "ESF_FP_Chl.a","ESF_FP_BGA", "ESF_Microcystin", "HAB_Status", "Alkalinity_CaCO3", "DOC", "UV_254", "Iron", "Manganese", "Arsenic", "QA", "QB", "QC")

Change duplicate Lake_Name:“Forest Lake” and remove white space from Lake_Name: “Hadlock Pond”

We want the lake with LakeID:“1301FOR0441” to be called “Lake Forest” instead of “Forest Lake”. And we need to change “Hadlock Pond” to “Hadlock Pond”

CSLAP$Lake_Name<-as.character(CSLAP$Lake_Name) #first, make this column a character vector instead of factor
CSLAP[CSLAP$LakeID == "1301FOR0441", 2] <- rep("Lake Forest", 40) #then, replace the specified cells with "Lake Forest"
CSLAP[CSLAP$LakeID == "1005HAD0432", 2] <- rep("Hadlock Pond", 78) #then, replace the specified cells with "Hadlock Pond"
CSLAP$Lake_Name<-as.factor(CSLAP$Lake_Name) #last, change the column back to factor vector

Create new columns for Sample_Year and Sample_Month

#extract sample year and month 
CSLAP$Sample_Year<-as.numeric(format(as.Date(CSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
CSLAP$Sample_Month<-as.numeric(format(as.Date(CSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))

For each lake, check how many years they were enrolled in CSLAP. Remove lakes with less than 3 years

table(CSLAP$LakeID, CSLAP$Sample_Year)
##               
##                2012 2013 2014 2015 2016 2017
##   0201CUB0115    16   16   16   16   16   16
##   0303LOR0009     8    8    0    8    8    8
##   0403RUS0146     0    0    0    0   16   16
##   0404LOO0079     4    8    8   16   16   16
##   0601CHE0213    17   17   16   16   17   16
##   0601GUI0188    16   16   16   13   17   16
##   0602BRA0154    13   12   16    0   19   16
##   0602CRO0073    12   13    4   13   17   14
##   0602EAR0146    22   26    3   20   18   16
##   0602EAT0163    13   12    0   16   16   16
##   0602ECH0081    12   17   17   17    0    0
##   0602GEN0088    12   14   16   18   18    0
##   0602HAT0155    15   17   17    0   16   16
##   0602PET0078    16   12   17   14   17   16
##   0602PLY0107     8    8    8    8   10    8
##   0602SON0072    18    2   23   21   20   16
##   0602TUL0068    10   12   16   17   18   16
##   0602ULI0067     9   12    0   18   17   16
##   0602WAR0094     8   13   16   16   17   16
##   0703CAZ0153    18   24   25    2   23   16
##   0703DER0139A   12   12   16   16   16   16
##   0703KAS0109     7    9    8    8    0    8
##   0703PAN0094     0    0    8    7    8   10
##   0703TUS0153A   12   14    0   16   14   16
##   0704CAN0286     0    0    0    0    1    0
##   0801BRA0689    16   16   16    0   16   16
##   0801EFF0426     0    8    8    0    0    0
##   0801OTT0926     0    8    0    8    8    8
##   0902EAG0083     9    8    0    8   11    9
##   0904SIL0351    10    8    8    9    8    8
##   0906BON0024    12   12   16   14   16   17
##   0906GRA0051     0   10   15   14    8   16
##   0906MIL0055    12   12   16   16   12   16
##   0906OTH0009     0    0    0   19   17   16
##   1004AUG0213    16   16    3   17   16   16
##   1004LIN0315     0   11   16   16   15    0
##   1004PLA0254    24   17   14   22   12   12
##   1005GLE0441    11   18   14   16   16   16
##   1005HAD0432    12   16   17   17    0   16
##   1005SUN0440    15   22   18   15    0   16
##   1102BAB1109    16   18   17   17   16   16
##   1102TAC1129     0    5    5    4    6    6
##   1104EAG0438    12   13   14   12   16   16
##   1104EFN0129    12   12   15   16   16    0
##   1104FOR0340     4    6    4    6    6    7
##   1104FRI0365    16   16   16    0    8    8
##   1104HUN0131    12   12    0   16   16   16
##   1104JEN0130    12   12   15    0   13   16
##   1104PAR0432     0   28    0    0    0    0
##   1104PLE0313     0    0   16   16   16    0
##   1104SAC0314    16   16    0    0    0   16
##   1104SCH0374    28   23   12   33   32   32
##   1201CAN0717    12   12   15   14   15   16
##   1201ECA0697    13   10   17   16   16   16
##   1201GAL0563    12   13   16   16   16   16
##   1201PEC0686     8    0    8   12   16   16
##   1201PLE0745     0   12   16   12    8    8
##   1301BOW0444     0    9    9    8   14    8
##   1301BUR0386C    0   13    9   15   16   16
##   1301FOR0441     0    8    8    7    9    8
##   1301IND0167     0    5    9    7   16   17
##   1301ROA0183B    9    8    0   16   10   10
##   1301SEP0826     0   16   13   15   16   10
##   1310QUE0057    13   12   17   19   14   16
##   1310ROU0080     0    6    7    7    6    8
##   1310SPR0081     0    0   16   16   16   16
##   1401ANA0251    12   14   16   14   16   14
##   1401DEV0174     0    0   16   16   17   12
##   1401SOM0268    24   24   17   16   17   16
##   1402YAN0021     0    8    8    6    0    8
##   1404OQU0383    16   16   16   16   14   16
##   1501TUX1007    16    0   16   17   12   14
##   1701LLO0708     8    8    8    7    8    8
CSLAP$LakeID<-as.character(CSLAP$LakeID) #first, change to a character vector

CSLAP<-CSLAP[CSLAP$LakeID != "0403RUS0146", ]
CSLAP<-CSLAP[CSLAP$LakeID != "0704CAN0286", ] 
CSLAP<-CSLAP[CSLAP$LakeID != "0801EFF0426", ] 
CSLAP<-CSLAP[CSLAP$LakeID != "1104PAR0432", ] 
CSLAP<-CSLAP[CSLAP$LakeID != "0602PLY0107", ] #Plymouth reservoir will also be removed because it is eutrophic

CSLAP$LakeID<-as.factor(CSLAP$LakeID)
CSLAP$LakeID<-droplevels(CSLAP$LakeID)
levels(CSLAP$LakeID)
##  [1] "0201CUB0115"  "0303LOR0009"  "0404LOO0079"  "0601CHE0213"  "0601GUI0188" 
##  [6] "0602BRA0154"  "0602CRO0073"  "0602EAR0146"  "0602EAT0163"  "0602ECH0081" 
## [11] "0602GEN0088"  "0602HAT0155"  "0602PET0078"  "0602SON0072"  "0602TUL0068" 
## [16] "0602ULI0067"  "0602WAR0094"  "0703CAZ0153"  "0703DER0139A" "0703KAS0109" 
## [21] "0703PAN0094"  "0703TUS0153A" "0801BRA0689"  "0801OTT0926"  "0902EAG0083" 
## [26] "0904SIL0351"  "0906BON0024"  "0906GRA0051"  "0906MIL0055"  "0906OTH0009" 
## [31] "1004AUG0213"  "1004LIN0315"  "1004PLA0254"  "1005GLE0441"  "1005HAD0432" 
## [36] "1005SUN0440"  "1102BAB1109"  "1102TAC1129"  "1104EAG0438"  "1104EFN0129" 
## [41] "1104FOR0340"  "1104FRI0365"  "1104HUN0131"  "1104JEN0130"  "1104PLE0313" 
## [46] "1104SAC0314"  "1104SCH0374"  "1201CAN0717"  "1201ECA0697"  "1201GAL0563" 
## [51] "1201PEC0686"  "1201PLE0745"  "1301BOW0444"  "1301BUR0386C" "1301FOR0441" 
## [56] "1301IND0167"  "1301ROA0183B" "1301SEP0826"  "1310QUE0057"  "1310ROU0080" 
## [61] "1310SPR0081"  "1401ANA0251"  "1401DEV0174"  "1401SOM0268"  "1402YAN0021" 
## [66] "1404OQU0383"  "1501TUX1007"  "1701LLO0708"

Change levels of Data_Provider

levels(CSLAP$Data_Provider)
## NULL
CSLAP$Data_Provider<-as.character(CSLAP$Data_Provider)
CSLAP[CSLAP$Data_Provider == "csl", 6] <- "CSL"
CSLAP$Data_Provider<-as.factor(CSLAP$Data_Provider)
CSLAP$Data_Provider<-droplevels(CSLAP$Data_Provider)
levels(CSLAP$Data_Provider)
## [1] "CSL"

Change levels of Info_Type

levels(CSLAP$Info_Type)
## NULL
CSLAP$Info_Type<-as.character(CSLAP$Info_Type)
CSLAP[CSLAP$Info_Type == "ow", 7] <- "OW"
CSLAP[CSLAP$Info_Type == "sb", 7] <- "SB"
CSLAP$Info_Type<-as.factor(CSLAP$Info_Type)
CSLAP$Info_Type<-droplevels(CSLAP$Info_Type)
levels(CSLAP$Info_Type)
## [1] "BS" "OW" "RT" "SB"

Clean ESF_Microcystin vector

# ESF_Microcystin should be a numeric column,but it's listed as factor. We need to remove character values from this column 

CSLAP$ESF_Microcystin[CSLAP$ESF_Microcystin == "ND"] <- NA
CSLAP$ESF_Microcystin[CSLAP$ESF_Microcystin == "no filter"] <- NA

CSLAP$ESF_Microcystin <- as.numeric(as.character(CSLAP$ESF_Microcystin))

str(CSLAP$ESF_Microcystin)
##  num [1:4898] NA NA NA NA NA NA NA NA NA NA ...

Add TN:TP column

CSLAP$TN_TP<-CSLAP$TN/CSLAP$TP

Check for missing values

result <- lapply(CSLAP, identifyMissing)
result
## $LakeID
## No problems found.
## $Lake_Name
## No problems found.
## $Proj_Code
## No problems found.
## $Sample_Name
## No problems found.
## $Sample_Date
## No problems found.
## $Data_Provider
## No problems found.
## $Info_Type
## No problems found.
## $LocationID
## No problems found.
## $Latitude
## No problems found.
## $Longitude
## No problems found.
## $Sample_Depth
## No problems found.
## $Secchi_Depth
## No problems found.
## $DO
## No problems found.
## $Bottom_DO
## No problems found.
## $pH
## No problems found.
## $TP
## No problems found.
## $TDP
## No problems found.
## $Conductance
## No problems found.
## $Temp_Air
## No problems found.
## $Temp_Water
## No problems found.
## $TN
## No problems found.
## $TDN
## No problems found.
## $Ammonia
## No problems found.
## $NOx
## No problems found.
## $Nitrate
## No problems found.
## $Nitrite
## No problems found.
## $True_Color
## No problems found.
## $Chloride
## No problems found.
## $Calcium
## No problems found.
## $Chl.a_FP
## No problems found.
## $Extracted_Chl.a
## No problems found.
## $UFI_SBK_FP_Chl.a
## No problems found.
## $UFI_SBK_FP_BGA
## No problems found.
## $UFI_SBK_Microcystin
## No problems found.
## $ESF_FP_Chl.a
## No problems found.
## $ESF_FP_BGA
## No problems found.
## $ESF_Microcystin
## No problems found.
## $HAB_Status
## No problems found.
## $Alkalinity_CaCO3
## No problems found.
## $DOC
## No problems found.
## $UV_254
## No problems found.
## $Iron
## No problems found.
## $Manganese
## No problems found.
## $Arsenic
## No problems found.
## $QA
## No problems found.
## $QB
## No problems found.
## $QC
## No problems found.
## $Sample_Year
## No problems found.
## $Sample_Month
## No problems found.
## $TN_TP
## The following suspected missing value codes enter as regular values: Inf, NaN.
CSLAP$TN_TP[CSLAP$TN_TP == "Inf"] <- NA
CSLAP$TN_TP[CSLAP$TN_TP == "NaN"] <- NA

result <- lapply(CSLAP, identifyMissing)
result
## $LakeID
## No problems found.
## $Lake_Name
## No problems found.
## $Proj_Code
## No problems found.
## $Sample_Name
## No problems found.
## $Sample_Date
## No problems found.
## $Data_Provider
## No problems found.
## $Info_Type
## No problems found.
## $LocationID
## No problems found.
## $Latitude
## No problems found.
## $Longitude
## No problems found.
## $Sample_Depth
## No problems found.
## $Secchi_Depth
## No problems found.
## $DO
## No problems found.
## $Bottom_DO
## No problems found.
## $pH
## No problems found.
## $TP
## No problems found.
## $TDP
## No problems found.
## $Conductance
## No problems found.
## $Temp_Air
## No problems found.
## $Temp_Water
## No problems found.
## $TN
## No problems found.
## $TDN
## No problems found.
## $Ammonia
## No problems found.
## $NOx
## No problems found.
## $Nitrate
## No problems found.
## $Nitrite
## No problems found.
## $True_Color
## No problems found.
## $Chloride
## No problems found.
## $Calcium
## No problems found.
## $Chl.a_FP
## No problems found.
## $Extracted_Chl.a
## No problems found.
## $UFI_SBK_FP_Chl.a
## No problems found.
## $UFI_SBK_FP_BGA
## No problems found.
## $UFI_SBK_Microcystin
## No problems found.
## $ESF_FP_Chl.a
## No problems found.
## $ESF_FP_BGA
## No problems found.
## $ESF_Microcystin
## No problems found.
## $HAB_Status
## No problems found.
## $Alkalinity_CaCO3
## No problems found.
## $DOC
## No problems found.
## $UV_254
## No problems found.
## $Iron
## No problems found.
## $Manganese
## No problems found.
## $Arsenic
## No problems found.
## $QA
## No problems found.
## $QB
## No problems found.
## $QC
## No problems found.
## $Sample_Year
## No problems found.
## $Sample_Month
## No problems found.
## $TN_TP
## No problems found.

Read in and merge mean depth, CA:SA, dreissenids by year, and %ag

Dreissenids<-read.csv("./raw_data/Dreissenid_Status_byYear.csv", header=TRUE)
Dreissenids$Lake_Name<-as.character(Dreissenids$Lake_Name) 
Dreissenids[200:204,2] <- rep("Hadlock Pond",5)
Dreissenids$Lake_Name<-as.factor(Dreissenids$Lake_Name)

CASA_MD<-read.csv("./raw_data/CASA_Mean_Depth.csv", header=TRUE)
CASA_MD$Lake_Name<-as.character(CASA_MD$Lake_Name) 
CASA_MD[27,2] <- "Hadlock Pond"
CASA_MD$Lake_Name<-as.factor(CASA_MD$Lake_Name)

Ag<-read.csv("./raw_data/Percent Ag Cover.csv", header=TRUE)
Ag$Lake_Name<-as.character(Ag$Lake_Name) 
Ag[27,1] <- "Hadlock Pond"
Ag$Lake_Name<-as.factor(Ag$Lake_Name)

CSLAP<-merge(CSLAP, Dreissenids, by=c("LakeID", "Lake_Name", "Sample_Year"), all.x =TRUE)
CSLAP<-merge(CSLAP, CASA_MD, by=c("LakeID", "Lake_Name"), all.x=TRUE)
CSLAP<-merge(CSLAP, Ag, by="Lake_Name", all.x=TRUE)

Now we have a clean, full dataset. Next, we need to create the reduced dataset using propensity score matching

Create Reduced Dataset

Read in lake data

Lake_List<-read.csv("./raw_data/LakeList.csv", header=TRUE)
Lake_List<-merge(Lake_List, Ag, by="Lake_Name", na.rm=FALSE, all.x=TRUE) #merge %ag
Lake_List<-Lake_List[Lake_List$LakeID != "0403RUS0146",] #Remove Lake Rushford

Propensity Score Estimation

m_ps <- glm(Dreissenids ~ Mean_Depth_m + CA.SA + Percent_Ag,
            family = binomial(), data = Lake_List)
summary(m_ps)
## 
## Call:
## glm(formula = Dreissenids ~ Mean_Depth_m + CA.SA + Percent_Ag, 
##     family = binomial(), data = Lake_List)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2155  -0.4089  -0.3318  -0.3144   2.4219  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  -3.113660   0.954012  -3.264  0.00110 **
## Mean_Depth_m  0.019552   0.131026   0.149  0.88138   
## CA.SA         0.005799   0.010392   0.558  0.57684   
## Percent_Ag    0.068900   0.026159   2.634  0.00844 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 49.008  on 66  degrees of freedom
## Residual deviance: 40.860  on 63  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 48.86
## 
## Number of Fisher Scoring iterations: 5
prs_df <- data.frame(pr_score = predict(m_ps, type = "response"),
                     Dreissenids = m_ps$model$Dreissenids)
head(prs_df)
##     pr_score Dreissenids
## 1 0.04688800           0
## 2 0.04809539           0
## 3 0.05508699           0
## 4 0.16993506           0
## 5 0.05479752           0
## 6 0.07388909           0
labs <- paste("Invasion Status:", c("Invaded", "Uninvaded"))
prs_df %>%
  mutate(Dreissenids = ifelse(Dreissenids == 1, labs[1], labs[2])) %>%
  ggplot(aes(x = pr_score)) +
  geom_histogram(color = "white") +
  facet_wrap(~Dreissenids) +
  xlab("Probability of being invaded") +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

lakes_nomiss <- Lake_List %>%  # MatchIt does not allow missing values
  dplyr::select(CA.SA, Mean_Depth_m, Percent_Ag, LakeID, Lake_Name, Dreissenids) %>%
  na.omit()

mod_match <- matchit(Dreissenids ~ CA.SA + Mean_Depth_m + Percent_Ag,
                     method = "nearest", data = lakes_nomiss)

df_match <- match.data(mod_match)

Extract observations that come from lakes in the reduced set to create new, clean reduced dataset

redLakes<-df_match[,c(4,6)]

redCSLAP<-merge(CSLAP, redLakes, by="LakeID")

redCSLAP<-redCSLAP[,1:54] #removed redundant Dreissenids column

colnames(redCSLAP)[51] <- "Dreissenids"

Data Summaries and Histograms

Global Dataset

Data Summaries

#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk 
CSLAP2<-CSLAP[, c(8, 13, 17, 21, 22, 28, 30, 32, 36, 37, 38, 50:54)]
meltCSLAP <- melt(CSLAP2, id=c("Dreissenids", "Info_Type"), na.rm=TRUE)

#summarize each variable split by invasion status and info
VariableSummaries<-ddply(meltCSLAP, c("Dreissenids", "Info_Type", "variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
VariableSummaries
##    Dreissenids Info_Type        variable         Mean     Median        Min
## 1      Invaded        BS              TP 3.945294e-02   0.019850  0.0000000
## 2      Invaded        BS      Temp_Water 1.511196e+01  15.000000  5.0000000
## 3      Invaded        BS           CA.SA 2.788857e+01   6.507353  4.4873502
## 4      Invaded        BS    Mean_Depth_m 6.232740e+00   6.200000  3.7000000
## 5      Invaded        BS      Percent_Ag 2.046619e+01  20.000000  1.0000000
## 6      Invaded        OW    Secchi_Depth 4.536559e+00   4.400000  1.1000000
## 7      Invaded        OW              TP 1.276286e-02   0.011000  0.0036000
## 8      Invaded        OW      Temp_Water 2.330388e+01  24.000000 10.0000000
## 9      Invaded        OW              TN 5.232666e-01   0.368810  0.0000000
## 10     Invaded        OW      True_Color 1.295292e+01  11.000000  0.5000000
## 11     Invaded        OW         Calcium 2.842750e+01  24.450000 14.3000000
## 12     Invaded        OW Extracted_Chl.a 2.372222e+00   1.900000  0.0500000
## 13     Invaded        OW    ESF_FP_Chl.a 1.945223e+00   1.600000  0.0000000
## 14     Invaded        OW      ESF_FP_BGA 4.813312e-01   0.100000  0.0000000
## 15     Invaded        OW ESF_Microcystin 6.437536e-01   0.440000  0.3285191
## 16     Invaded        OW           TN_TP 4.676542e+01  31.182701  0.0000000
## 17     Invaded        OW           CA.SA 2.823056e+01  10.780370  4.4873502
## 18     Invaded        OW    Mean_Depth_m 6.233231e+00   6.200000  3.7000000
## 19     Invaded        OW      Percent_Ag 1.934462e+01  20.000000  1.0000000
## 20     Invaded        SB    Secchi_Depth 2.750000e+00   2.750000  2.7500000
## 21     Invaded        SB    ESF_FP_Chl.a 2.138201e+03 142.635000  0.2000000
## 22     Invaded        SB      ESF_FP_BGA 2.002831e+03  85.475000  0.0000000
## 23     Invaded        SB ESF_Microcystin 7.493377e+01  43.840108  0.8346926
## 24     Invaded        SB           CA.SA 1.424389e+01   4.807692  4.6535893
## 25     Invaded        SB    Mean_Depth_m 5.713333e+00   7.100000  3.7000000
## 26     Invaded        SB      Percent_Ag 3.256667e+01  21.500000 20.0000000
## 27   Uninvaded        BS    Secchi_Depth 5.383333e+00   5.350000  5.1000000
## 28   Uninvaded        BS              TP 5.579272e-02   0.015000  0.0000000
## 29   Uninvaded        BS      Temp_Water 1.347582e+01  13.000000  3.0000000
## 30   Uninvaded        BS              TN 3.655000e-01   0.365500  0.3580000
## 31   Uninvaded        BS      True_Color 1.300000e+01  13.000000  9.0000000
## 32   Uninvaded        BS Extracted_Chl.a 1.600000e+00   1.600000  1.6000000
## 33   Uninvaded        BS    ESF_FP_Chl.a 5.300000e-01   0.570000  0.3900000
## 34   Uninvaded        BS      ESF_FP_BGA 0.000000e+00   0.000000  0.0000000
## 35   Uninvaded        BS           TN_TP 1.234775e+01  12.347749  8.1978022
## 36   Uninvaded        BS           CA.SA 1.816096e+01   7.732739  0.7403433
## 37   Uninvaded        BS    Mean_Depth_m 7.083978e+00   5.500000  2.6000000
## 38   Uninvaded        BS      Percent_Ag 8.507919e+00   1.000000  0.0000000
## 39   Uninvaded        OW    Secchi_Depth 4.075063e+00   3.800000  1.0000000
## 40   Uninvaded        OW              TP 1.207152e-02   0.009700  0.0000000
## 41   Uninvaded        OW      Temp_Water 2.271985e+01  23.000000  7.0000000
## 42   Uninvaded        OW              TN 3.831279e-01   0.325000  0.0000000
## 43   Uninvaded        OW      True_Color 1.578847e+01  13.000000  0.5000000
## 44   Uninvaded        OW         Calcium 1.104227e+01   8.155000  1.5300000
## 45   Uninvaded        OW Extracted_Chl.a 3.377997e+00   2.400000  0.0500000
## 46   Uninvaded        OW    ESF_FP_Chl.a 1.007138e+01   1.700000  0.0000000
## 47   Uninvaded        OW      ESF_FP_BGA 2.181487e+00   0.090000  0.0000000
## 48   Uninvaded        OW ESF_Microcystin 1.188658e+00   0.407566  0.1600000
## 49   Uninvaded        OW           TN_TP 4.209054e+01  32.444444  1.0840999
## 50   Uninvaded        OW           CA.SA 1.883358e+01   8.945022  0.7403433
## 51   Uninvaded        OW    Mean_Depth_m 5.966943e+00   4.700000  0.4000000
## 52   Uninvaded        OW      Percent_Ag 6.657864e+00   1.000000  0.0000000
## 53   Uninvaded        RT           CA.SA 8.140788e+01  81.407877 81.4078774
## 54   Uninvaded        RT    Mean_Depth_m 1.700000e+01  17.000000 17.0000000
## 55   Uninvaded        RT      Percent_Ag 1.000000e+00   1.000000  1.0000000
## 56   Uninvaded        SB    Secchi_Depth 3.600000e+00   2.500000  1.7000000
## 57   Uninvaded        SB    ESF_FP_Chl.a 2.791198e+03 171.710000  0.2800000
## 58   Uninvaded        SB      ESF_FP_BGA 2.309770e+03  26.870000  0.0000000
## 59   Uninvaded        SB ESF_Microcystin 2.086218e+03 100.355000  0.8100000
## 60   Uninvaded        SB           CA.SA 9.269072e+00   6.057692  0.7403433
## 61   Uninvaded        SB    Mean_Depth_m 5.083735e+00   4.600000  0.4000000
## 62   Uninvaded        SB      Percent_Ag 9.583851e+00   4.000000  0.0000000
##            Max           SD
## 1      0.37420 5.324365e-02
## 2     25.00000 5.298994e+00
## 3    189.85507 5.436477e+01
## 4      9.90000 1.727590e+00
## 5     48.00000 1.550414e+01
## 6      9.10000 1.395980e+00
## 7      0.06640 7.093941e-03
## 8     36.00000 2.860583e+00
## 9      4.12400 4.399775e-01
## 10    55.00000 9.245752e+00
## 11    57.80000 1.133176e+01
## 12    13.30000 1.839573e+00
## 13    12.15000 1.634573e+00
## 14    10.36000 9.835983e-01
## 15     1.84000 5.345261e-01
## 16   240.59524 4.110511e+01
## 17   189.85507 5.391221e+01
## 18     9.90000 1.697457e+00
## 19    48.00000 1.529962e+01
## 20     2.75000           NA
## 21 31507.50000 6.226951e+03
## 22 31507.50000 6.221888e+03
## 23   361.24104 9.244751e+01
## 24   189.85507 4.064373e+01
## 25     9.90000 1.901513e+00
## 26    48.00000 1.359881e+01
## 27     5.70000 3.013857e-01
## 28     3.90960 1.657730e-01
## 29    29.00000 5.807561e+00
## 30     0.37300 1.060660e-02
## 31    17.00000 4.000000e+00
## 32     1.60000           NA
## 33     0.63000 1.249000e-01
## 34     0.00000 0.000000e+00
## 35    16.49770 5.868911e+00
## 36   210.42471 3.484989e+01
## 37    21.30000 4.142202e+00
## 38    44.00000 1.271529e+01
## 39    13.60000 1.697343e+00
## 40     0.21590 1.094758e-02
## 41    34.00000 3.428063e+00
## 42    12.93200 4.134485e-01
## 43   100.00000 1.073302e+01
## 44    58.30000 9.711479e+00
## 45    63.20000 3.682886e+00
## 46  3438.50000 1.476402e+02
## 47  2564.00000 5.420867e+01
## 48    52.86975 5.577302e+00
## 49  7936.00000 1.705462e+02
## 50   210.42471 3.385397e+01
## 51    21.30000 4.112813e+00
## 52    44.00000 1.106120e+01
## 53    81.40788           NA
## 54    17.00000           NA
## 55     1.00000           NA
## 56     6.60000 2.628688e+00
## 57 74781.25000 9.683792e+03
## 58 74781.25000 9.173332e+03
## 59 39757.12502 7.757750e+03
## 60    54.94505 7.784528e+00
## 61    13.00000 2.380611e+00
## 62    41.00000 1.102359e+01
#I will pull out only the values that I need to make copying to a word document "easy"
VariableSummaries[c(1, 6:11,15:18, 20:22, 27, 38:34, 47:50, 55:57),]
##    Dreissenids Info_Type        variable         Mean     Median        Min
## 1      Invaded        BS              TP 3.945294e-02   0.019850  0.0000000
## 6      Invaded        OW    Secchi_Depth 4.536559e+00   4.400000  1.1000000
## 7      Invaded        OW              TP 1.276286e-02   0.011000  0.0036000
## 8      Invaded        OW      Temp_Water 2.330388e+01  24.000000 10.0000000
## 9      Invaded        OW              TN 5.232666e-01   0.368810  0.0000000
## 10     Invaded        OW      True_Color 1.295292e+01  11.000000  0.5000000
## 11     Invaded        OW         Calcium 2.842750e+01  24.450000 14.3000000
## 15     Invaded        OW ESF_Microcystin 6.437536e-01   0.440000  0.3285191
## 16     Invaded        OW           TN_TP 4.676542e+01  31.182701  0.0000000
## 17     Invaded        OW           CA.SA 2.823056e+01  10.780370  4.4873502
## 18     Invaded        OW    Mean_Depth_m 6.233231e+00   6.200000  3.7000000
## 20     Invaded        SB    Secchi_Depth 2.750000e+00   2.750000  2.7500000
## 21     Invaded        SB    ESF_FP_Chl.a 2.138201e+03 142.635000  0.2000000
## 22     Invaded        SB      ESF_FP_BGA 2.002831e+03  85.475000  0.0000000
## 27   Uninvaded        BS    Secchi_Depth 5.383333e+00   5.350000  5.1000000
## 38   Uninvaded        BS      Percent_Ag 8.507919e+00   1.000000  0.0000000
## 37   Uninvaded        BS    Mean_Depth_m 7.083978e+00   5.500000  2.6000000
## 36   Uninvaded        BS           CA.SA 1.816096e+01   7.732739  0.7403433
## 35   Uninvaded        BS           TN_TP 1.234775e+01  12.347749  8.1978022
## 34   Uninvaded        BS      ESF_FP_BGA 0.000000e+00   0.000000  0.0000000
## 47   Uninvaded        OW      ESF_FP_BGA 2.181487e+00   0.090000  0.0000000
## 48   Uninvaded        OW ESF_Microcystin 1.188658e+00   0.407566  0.1600000
## 49   Uninvaded        OW           TN_TP 4.209054e+01  32.444444  1.0840999
## 50   Uninvaded        OW           CA.SA 1.883358e+01   8.945022  0.7403433
## 55   Uninvaded        RT      Percent_Ag 1.000000e+00   1.000000  1.0000000
## 56   Uninvaded        SB    Secchi_Depth 3.600000e+00   2.500000  1.7000000
## 57   Uninvaded        SB    ESF_FP_Chl.a 2.791198e+03 171.710000  0.2800000
##            Max           SD
## 1      0.37420 5.324365e-02
## 6      9.10000 1.395980e+00
## 7      0.06640 7.093941e-03
## 8     36.00000 2.860583e+00
## 9      4.12400 4.399775e-01
## 10    55.00000 9.245752e+00
## 11    57.80000 1.133176e+01
## 15     1.84000 5.345261e-01
## 16   240.59524 4.110511e+01
## 17   189.85507 5.391221e+01
## 18     9.90000 1.697457e+00
## 20     2.75000           NA
## 21 31507.50000 6.226951e+03
## 22 31507.50000 6.221888e+03
## 27     5.70000 3.013857e-01
## 38    44.00000 1.271529e+01
## 37    21.30000 4.142202e+00
## 36   210.42471 3.484989e+01
## 35    16.49770 5.868911e+00
## 34     0.00000 0.000000e+00
## 47  2564.00000 5.420867e+01
## 48    52.86975 5.577302e+00
## 49  7936.00000 1.705462e+02
## 50   210.42471 3.385397e+01
## 55     1.00000           NA
## 56     6.60000 2.628688e+00
## 57 74781.25000 9.683792e+03
#Write this dataframe to a csv so the values can be easily put into a word document 
write.csv(VariableSummaries, "./output_data/VariableSummaries-June2020.csv")
#melt Lake List data frame by Dreissenids so we can summarize all our variables of interest in one code chunk 
meltLakes <- melt(Lake_List[,c(2:6)], id=c("LakeID", "Dreissenids"), na.rm=TRUE)

#summarize each variable split by invasion status and info
LakeSummaries<-ddply(meltLakes, c("Dreissenids", "variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
LakeSummaries
##   Dreissenids     variable      Mean    Median       Min      Max        SD
## 1           0        CA.SA 16.590663  8.681141 0.7403433 210.4247 30.550112
## 2           0 Mean_Depth_m  5.750000  4.700000 0.4000000  21.3000  3.859152
## 3           0   Percent_Ag  6.357627  1.000000 0.0000000  44.0000 11.095405
## 4           1        CA.SA 32.575834  8.643862 4.4873502 189.8551 63.897069
## 5           1 Mean_Depth_m  6.325000  6.400000 3.7000000   9.9000  1.918146
## 6           1   Percent_Ag 21.000000 20.000000 1.0000000  48.0000 15.874508
#Write this dataframe to a csv so the values can be easily put into a word document 
write.csv(LakeSummaries, "./output_data/LakeSummaries.csv")
#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk 
CSLAP3<-CSLAP[CSLAP$Info_Type == "OW", c(1, 13, 17, 32)]
CSLAP3$TP <- CSLAP3$TP*1000
meltTrophic <- melt(CSLAP3, id="Lake_Name", na.rm=TRUE)

#summarize each variable split by invasion status and info
TrophicSummaries<-ddply(meltTrophic, c("Lake_Name","variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
TrophicSummaries
##                   Lake_Name        variable       Mean  Median    Min     Max
## 1                Augur Lake    Secchi_Depth  3.0500000  2.8250  1.650   4.850
## 2                Augur Lake              TP 21.1951220 20.3000 12.600  37.300
## 3                Augur Lake Extracted_Chl.a  7.6146341  6.5000  1.500  34.200
## 4              Babcock Lake    Secchi_Depth  4.2043750  4.2250  2.030   6.000
## 5              Babcock Lake              TP  9.3687500  9.0500  6.500  17.800
## 6              Babcock Lake Extracted_Chl.a  3.1680851  2.9000  0.900   8.000
## 7           Big Bowman Pond    Secchi_Depth  2.1462500  2.0875  1.350   3.800
## 8           Big Bowman Pond              TP 14.2400000 12.8500  8.800  24.700
## 9           Big Bowman Pond Extracted_Chl.a  7.4794872  6.6000  1.500  19.200
## 10  Bradley Brook Reservoir    Secchi_Depth  6.3893750  6.1500  4.550   8.700
## 11  Bradley Brook Reservoir              TP 10.0450000  9.4000  6.500  19.900
## 12  Bradley Brook Reservoir Extracted_Chl.a  1.8250000  1.8000  0.100   4.400
## 13         Brantingham Lake    Secchi_Depth  3.2543750  3.3250  1.700   4.775
## 14         Brantingham Lake              TP  9.2325000  9.0500  5.300  15.000
## 15         Brantingham Lake Extracted_Chl.a  5.4425000  4.6500  0.300  14.400
## 16        Burden Third Lake    Secchi_Depth  4.4702703  4.3000  1.600   7.050
## 17        Burden Third Lake              TP 12.5736842  9.9500  6.700  75.500
## 18        Burden Third Lake Extracted_Chl.a  3.7162162  2.5000  0.500  17.800
## 19              Canada Lake    Secchi_Depth  4.1956522  4.2000  2.950   5.800
## 20              Canada Lake              TP  5.0739130  4.7000  3.300   8.000
## 21              Canada Lake Extracted_Chl.a  2.0333333  1.9000  0.800   3.900
## 22           Cazenovia Lake    Secchi_Depth  4.1708974  4.1500  1.620   7.000
## 23           Cazenovia Lake              TP 19.2000000 17.1000 10.600  42.100
## 24           Cazenovia Lake Extracted_Chl.a  3.0416667  2.3000  0.900   9.300
## 25            Chenango Lake    Secchi_Depth  4.8713542  4.7250  3.000   6.950
## 26            Chenango Lake              TP 10.6291667 10.0000  4.800  28.400
## 27            Chenango Lake Extracted_Chl.a  2.4750000  2.0500  0.500   7.800
## 28              Craine Lake    Secchi_Depth  3.1841026  2.7000  1.700   6.650
## 29              Craine Lake              TP 13.1300000 10.6000  7.000  51.900
## 30              Craine Lake Extracted_Chl.a  1.9287500  1.8500  0.050   4.800
## 31             Crooked Lake    Secchi_Depth  3.3519737  3.3000  2.050   4.650
## 32             Crooked Lake              TP 15.1275000 14.8000  7.700  21.500
## 33             Crooked Lake Extracted_Chl.a  6.1457143  4.5000  1.300  16.700
## 34                Cuba Lake    Secchi_Depth  3.0702128  2.9500  1.400   5.850
## 35                Cuba Lake              TP 19.2680851 17.4000  8.900  46.900
## 36                Cuba Lake Extracted_Chl.a  7.7195652  6.6000  0.500  49.100
## 37      De Ruyter Reservoir    Secchi_Depth  4.9765625  4.7250  3.250   7.700
## 38      De Ruyter Reservoir              TP 14.4541667 13.1000  3.600  62.900
## 39      De Ruyter Reservoir Extracted_Chl.a  2.1645833  2.0000  0.400   4.400
## 40               Eagle Lake    Secchi_Depth  6.8555556  6.9000  2.550  10.550
## 41               Eagle Lake              TP  7.8818182  6.3000  4.000  32.700
## 42               Eagle Lake Extracted_Chl.a  1.0857143  1.1000  0.300   2.600
## 43               Eagle Pond    Secchi_Depth  1.4178788  1.4000  1.200   1.800
## 44               Eagle Pond              TP 17.3575000 12.3000  6.800  82.400
## 45               Eagle Pond Extracted_Chl.a  3.4712500  1.9500  0.050  32.500
## 46         East Caroga Lake    Secchi_Depth  3.6425532  3.7000  2.450   5.000
## 47         East Caroga Lake              TP 17.9000000  9.0000  5.100 110.100
## 48         East Caroga Lake Extracted_Chl.a  3.5119565  2.4000  0.050  31.500
## 49    Eaton Brook Reservoir    Secchi_Depth  5.5658537  5.5000  2.650   9.100
## 50    Eaton Brook Reservoir              TP 11.0024390  9.2000  5.500  66.400
## 51    Eaton Brook Reservoir Extracted_Chl.a  1.4961538  1.3000  0.050   3.600
## 52                Echo Lake    Secchi_Depth  2.6585938  2.6250  1.650   4.000
## 53                Echo Lake              TP 13.2000000 11.6000  8.400  31.400
## 54                Echo Lake Extracted_Chl.a  4.0209677  2.6000  0.050  17.400
## 55               Efner Lake    Secchi_Depth  3.9700000  3.8000  2.700   6.700
## 56               Efner Lake              TP  6.8725000  6.3000  3.300  15.900
## 57               Efner Lake Extracted_Chl.a  1.9333333  1.5000  0.400   7.800
## 58              Forest Lake    Secchi_Depth  2.4459091  2.4000  1.700   3.130
## 59              Forest Lake              TP 11.6939394 11.6000  7.600  21.100
## 60              Forest Lake Extracted_Chl.a  4.2242424  3.4000  0.600  16.300
## 61             Friends Lake    Secchi_Depth  4.4312500  4.3500  3.100   6.950
## 62             Friends Lake              TP  7.7550000  7.5000  4.100  13.600
## 63             Friends Lake Extracted_Chl.a  3.2282051  2.6000  0.200  23.400
## 64              Galway Lake    Secchi_Depth  3.3627660  3.3500  2.000   4.900
## 65              Galway Lake              TP 18.4680851 10.7000  7.300 152.200
## 66              Galway Lake Extracted_Chl.a  3.1108696  3.2000  0.300   6.800
## 67         Geneganslet Lake    Secchi_Depth  3.0876974  2.9250  2.300   5.000
## 68         Geneganslet Lake              TP 10.7600000  9.4000  5.500  47.800
## 69         Geneganslet Lake Extracted_Chl.a  2.8900000  2.2500  0.300  18.800
## 70                Glen Lake    Secchi_Depth  4.0795455  4.0000  2.600   6.000
## 71                Glen Lake              TP  8.1090909  7.5000  4.300  16.000
## 72                Glen Lake Extracted_Chl.a  2.5976744  1.8000  0.600  13.300
## 73               Grass Lake    Secchi_Depth  3.2242424  3.0500  2.050   5.400
## 74               Grass Lake              TP 16.0774194 15.5000  0.000  34.000
## 75               Grass Lake Extracted_Chl.a  2.6342857  1.3000  0.050  12.900
## 76            Guilford Lake    Secchi_Depth  3.1141304  3.1750  1.000   5.800
## 77            Guilford Lake              TP 22.9282609 14.9500  8.400  99.200
## 78            Guilford Lake Extracted_Chl.a  6.6322222  3.6000  0.050  24.600
## 79             Hadlock Pond    Secchi_Depth  4.7918750  4.7250  3.250   8.350
## 80             Hadlock Pond              TP  7.7250000  7.3500  4.800  12.200
## 81             Hadlock Pond Extracted_Chl.a  2.7925000  2.3500  0.900   6.000
## 82               Hatch Lake    Secchi_Depth  5.0307692  4.8000  2.400   8.050
## 83               Hatch Lake              TP 12.0575000 12.3500  5.600  16.000
## 84               Hatch Lake Extracted_Chl.a  2.9150000  2.7000  0.600  10.300
## 85                Hunt Lake    Secchi_Depth  3.6850000  3.7500  2.500   6.000
## 86                Hunt Lake              TP  9.7947368  7.3000  4.600  42.500
## 87                Hunt Lake Extracted_Chl.a  2.4421053  2.2000  1.000   5.600
## 88              Indian Lake    Secchi_Depth  6.3558824  6.6500  3.550   7.200
## 89              Indian Lake              TP  9.5611111  9.3500  6.400  14.300
## 90              Indian Lake Extracted_Chl.a  0.8705882  0.9000  0.100   2.000
## 91               Jenny Lake    Secchi_Depth  3.7428378  3.6500  2.300   6.550
## 92               Jenny Lake              TP  6.9974359  6.6000  4.900  11.000
## 93               Jenny Lake Extracted_Chl.a  3.2000000  2.8000  0.300  13.900
## 94              Kasoag Lake    Secchi_Depth  3.7910256  3.7000  2.800   4.900
## 95              Kasoag Lake              TP 13.2820513 12.8000  6.000  33.100
## 96              Kasoag Lake Extracted_Chl.a  5.3868421  2.6000  0.200  63.200
## 97            Lake Anawanda    Secchi_Depth  5.9383333  6.0000  3.100  13.600
## 98            Lake Anawanda              TP  6.9217391  6.2500  4.000  16.100
## 99            Lake Anawanda Extracted_Chl.a  1.9913043  1.5000  0.600   6.700
## 100          Lake Bonaparte    Secchi_Depth  4.3622222  4.4000  3.250   5.800
## 101          Lake Bonaparte              TP  9.4553191  7.7000  5.700  52.700
## 102          Lake Bonaparte Extracted_Chl.a  2.0340426  1.9000  0.100   5.200
## 103           Lake Devenoge    Secchi_Depth  3.7076923  3.5500  2.900   4.900
## 104           Lake Devenoge              TP  8.3000000  8.6500  3.400  12.500
## 105           Lake Devenoge Extracted_Chl.a  2.9333333  2.5000  0.500   7.200
## 106             Lake Forest    Secchi_Depth  3.3352564  3.2500  2.150   4.700
## 107             Lake Forest              TP 13.0102564  8.5000  4.900  55.000
## 108             Lake Forest Extracted_Chl.a  5.1205128  4.0000  1.500  42.700
## 109       Lake of the Woods    Secchi_Depth  5.4979167  5.4000  4.050   7.200
## 110       Lake of the Woods              TP  7.8458333  7.2500  5.100  20.400
## 111       Lake of the Woods Extracted_Chl.a  1.0708333  1.1000  0.100   2.000
## 112            Lake Petonia    Secchi_Depth  5.0825000  5.3500  2.150   7.650
## 113            Lake Petonia              TP  9.3638298  8.8000  4.200  19.900
## 114            Lake Petonia Extracted_Chl.a  1.8304348  1.4500  0.200   8.400
## 115             Lake Placid    Secchi_Depth  7.7070732  7.4000  5.130  10.650
## 116             Lake Placid              TP  4.5440000  4.2000  3.000  11.500
## 117             Lake Placid Extracted_Chl.a  1.8659574  1.6000  0.600   6.300
## 118           Lake Pleasant    Secchi_Depth  4.5456522  4.6000  3.600   5.750
## 119           Lake Pleasant              TP  7.0125000  6.9500  5.200   9.300
## 120           Lake Pleasant Extracted_Chl.a  2.5083333  2.3000  0.800   8.000
## 121          Lake Sunnyside    Secchi_Depth  6.1452564  6.3000  4.300   7.280
## 122          Lake Sunnyside              TP 12.9025641 12.0000  8.100  23.200
## 123          Lake Sunnyside Extracted_Chl.a  2.1631579  1.4500  0.300  13.200
## 124            Lincoln Pond    Secchi_Depth  4.1883333  4.1750  2.800   5.650
## 125            Lincoln Pond              TP  7.4533333  6.6500  4.800  19.800
## 126            Lincoln Pond Extracted_Chl.a  1.8533333  1.7500  0.800   4.700
## 127        Little Long Pond    Secchi_Depth  3.0212766  3.1000  1.250   4.350
## 128        Little Long Pond              TP 31.9021277 19.3000 10.600 215.900
## 129        Little Long Pond Extracted_Chl.a  1.9891489  1.6000  0.300   9.100
## 130               Loon Lake    Secchi_Depth  4.0656250  4.0000  2.250   5.300
## 131               Loon Lake              TP 12.1666667 11.5000  7.000  18.100
## 132               Loon Lake Extracted_Chl.a  2.6166667  1.1500  0.050  13.500
## 133             Lorton Lake    Secchi_Depth  1.9393750  2.1000  1.100   2.700
## 134             Lorton Lake              TP 19.9230769 16.6000 12.500  77.200
## 135             Lorton Lake Extracted_Chl.a  5.6473684  3.8500  1.100  27.700
## 136           Millsite Lake    Secchi_Depth  7.5055556  7.5500  5.150   9.650
## 137           Millsite Lake              TP  8.6086957  7.8500  4.500  20.600
## 138           Millsite Lake Extracted_Chl.a  1.5622222  1.3000  0.300   4.400
## 139             Oquaga Lake    Secchi_Depth  8.1382979  7.9000  4.850  11.550
## 140             Oquaga Lake              TP  5.3872340  4.8000  3.200  14.600
## 141             Oquaga Lake Extracted_Chl.a  0.9467391  0.8500  0.050   2.600
## 142              Otter Lake    Secchi_Depth  1.5046875  1.5000  1.300   1.750
## 143              Otter Lake              TP 14.0625000 13.3000  8.000  30.100
## 144              Otter Lake Extracted_Chl.a  4.5193548  4.6000  1.100  10.100
## 145            Panther Lake    Secchi_Depth  2.1087500  1.9750  1.250   5.000
## 146            Panther Lake              TP 15.4400000 14.0000  7.700  58.000
## 147            Panther Lake Extracted_Chl.a  5.7465517  5.4000  0.050  12.500
## 148               Peck Lake    Secchi_Depth  3.4047794  3.2500  2.750   4.500
## 149               Peck Lake              TP  6.5475000  6.4000  0.000   9.900
## 150               Peck Lake Extracted_Chl.a  3.0138889  2.5000  1.300   9.100
## 151           Pleasant Lake    Secchi_Depth  4.2594595  4.3000  3.100   5.200
## 152           Pleasant Lake              TP  7.3864865  7.1000  4.400  18.700
## 153           Pleasant Lake Extracted_Chl.a  2.2305556  2.1000  0.800   3.900
## 154            Queechy Lake    Secchi_Depth  5.6402174  5.5875  3.150   9.050
## 155            Queechy Lake              TP  8.7350000  8.2500  5.300  15.000
## 156            Queechy Lake Extracted_Chl.a  1.9372340  1.4000  0.050  17.500
## 157      Roaring Brook Lake    Secchi_Depth  2.7923077  2.8000  1.500   4.300
## 158      Roaring Brook Lake              TP 12.9700000 12.5000  8.500  21.500
## 159      Roaring Brook Lake Extracted_Chl.a  3.6910256  3.3000  0.050   9.500
## 160              Round Pond    Secchi_Depth  2.3926471  2.4000  1.600   3.850
## 161              Round Pond              TP 10.9529412 10.9500  6.000  20.400
## 162              Round Pond Extracted_Chl.a  3.6787879  2.7000  0.500  10.100
## 163          Sacandaga Lake    Secchi_Depth  4.4875000  4.5250  3.450   5.450
## 164          Sacandaga Lake              TP  9.0125000  8.4000  6.000  17.500
## 165          Sacandaga Lake Extracted_Chl.a  3.5500000  3.3500  1.800   8.300
## 166            Schroon Lake    Secchi_Depth  3.9670000  3.8000  2.450   7.550
## 167            Schroon Lake              TP  8.4921053  6.5000  0.000  51.600
## 168            Schroon Lake Extracted_Chl.a  1.6392157  1.6000  0.050   6.500
## 169            Sepasco Lake    Secchi_Depth  3.1009091  3.0500  1.725   4.700
## 170            Sepasco Lake              TP 17.6000000 16.4500 10.400  28.800
## 171            Sepasco Lake Extracted_Chl.a  5.1500000  4.3000  0.100  15.000
## 172             Silver Lake    Secchi_Depth  3.9240909  4.0000  2.300   4.900
## 173             Silver Lake              TP  9.6208333  8.7000  5.900  18.300
## 174             Silver Lake Extracted_Chl.a  4.7978723  3.9000  0.900  23.200
## 175           Somerset Lake    Secchi_Depth  4.8375000  4.7250  2.950   7.900
## 176           Somerset Lake              TP 10.1212766  9.9000  3.300  15.500
## 177           Somerset Lake Extracted_Chl.a  2.2085106  1.7000  0.200   9.700
## 178               Song Lake    Secchi_Depth  2.9956757  2.9000  1.850   4.750
## 179               Song Lake              TP 16.4662500 16.0500  0.450  27.600
## 180               Song Lake Extracted_Chl.a  3.9850000  3.9500  0.100   8.200
## 181             Spring Lake    Secchi_Depth  2.5145313  2.3750  1.750   4.700
## 182             Spring Lake              TP 12.5375000  9.1000  5.700  43.100
## 183             Spring Lake Extracted_Chl.a  1.8046875  1.3500  0.050  13.600
## 184            Taconic Lake    Secchi_Depth  4.3698077  4.3625  3.100   6.250
## 185            Taconic Lake              TP  6.4583333  6.1500  0.500  14.000
## 186            Taconic Lake Extracted_Chl.a  1.3437500  0.9500  0.050   6.300
## 187              Tully Lake    Secchi_Depth  4.1564103  3.9500  2.500   6.750
## 188              Tully Lake              TP 14.8000000 14.6000  0.000  26.100
## 189              Tully Lake Extracted_Chl.a  4.5382979  4.2000  0.200  10.600
## 190          Tuscarora Lake    Secchi_Depth  4.9038462  4.7000  1.100   8.800
## 191          Tuscarora Lake              TP 11.3846154 10.4000  4.700  21.200
## 192          Tuscarora Lake Extracted_Chl.a  2.5270270  1.6000  0.200  11.200
## 193             Tuxedo Lake    Secchi_Depth  3.1998649  2.9000  1.850   8.500
## 194             Tuxedo Lake              TP 20.4085714 17.3000  9.300  53.400
## 195             Tuxedo Lake Extracted_Chl.a  5.8783784  5.2000  0.800  17.900
## 196  Upper Little York Lake    Secchi_Depth  4.1833333  4.2500  2.000   6.900
## 197  Upper Little York Lake              TP 13.2421053 12.0000  8.200  28.700
## 198  Upper Little York Lake Extracted_Chl.a  2.1445946  1.8000  0.050   7.800
## 199               Warn Lake    Secchi_Depth  3.4479167  3.5000  2.000   5.500
## 200               Warn Lake              TP 11.7291667 10.8000  8.200  21.800
## 201               Warn Lake Extracted_Chl.a  4.3413043  4.3500  1.100  10.400
## 202             Yankee Lake    Secchi_Depth  3.2118333  3.1375  2.300   4.900
## 203             Yankee Lake              TP 18.1433333 13.8000  7.900  71.900
## 204             Yankee Lake Extracted_Chl.a  2.9533333  2.9500  0.500   5.800
##             SD
## 1    0.6883648
## 2    5.3848840
## 3    5.6200783
## 4    0.8409453
## 5    2.2221473
## 6    1.8086875
## 7    0.4742251
## 8    3.6338720
## 9    4.3201596
## 10   1.0150926
## 11   2.8473965
## 12   1.0637982
## 13   0.8669444
## 14   2.0662598
## 15   3.3057749
## 16   1.1980574
## 17  11.2707391
## 18   3.4994852
## 19   0.6966448
## 20   1.1780275
## 21   0.8121240
## 22   0.9247061
## 23   6.9674809
## 24   1.9895979
## 25   0.9619764
## 26   3.8966411
## 27   1.5408619
## 28   1.1590779
## 29   8.3979912
## 30   1.2660198
## 31   0.7415374
## 32   2.8976549
## 33   3.9799012
## 34   1.0072379
## 35   7.8332376
## 36   7.6223245
## 37   1.1783667
## 38   8.0962024
## 39   1.0115271
## 40   1.4249092
## 41   4.9807197
## 42   0.4760464
## 43   0.1218031
## 44  13.9405678
## 45   5.3671351
## 46   0.6035348
## 47  21.4132933
## 48   5.0588886
## 49   1.3442674
## 50   9.3562943
## 51   0.8437373
## 52   0.7245858
## 53   4.7263162
## 54   4.1490114
## 55   0.8134920
## 56   2.4990242
## 57   1.3387609
## 58   0.3904834
## 59   2.7789318
## 60   3.4314747
## 61   0.7747363
## 62   1.6903876
## 63   3.5227792
## 64   0.6941109
## 65  24.3472622
## 66   1.4914912
## 67   0.5359175
## 68   6.7842124
## 69   2.7956743
## 70   0.8651064
## 71   2.4129320
## 72   2.5160900
## 73   0.8318955
## 74   5.4548272
## 75   3.2150101
## 76   1.2988312
## 77  20.9236991
## 78   6.7364945
## 79   1.0187723
## 80   1.7512999
## 81   1.1899984
## 82   1.2954686
## 83   2.3474905
## 84   1.7700572
## 85   0.7298753
## 86   8.1229720
## 87   0.9027381
## 88   0.8882712
## 89   2.0875369
## 90   0.4700594
## 91   0.9240885
## 92   1.6329642
## 93   2.1791394
## 94   0.4805249
## 95   4.6929326
## 96  10.3917891
## 97   1.6701218
## 98   2.2867651
## 99   1.5665711
## 100  0.6353377
## 101  7.0592772
## 102  0.9571871
## 103  0.5949273
## 104  1.7522399
## 105  1.7642686
## 106  0.7114135
## 107 11.3368935
## 108  6.4346586
## 109  0.8060207
## 110  3.1076314
## 111  0.4920933
## 112  1.2116146
## 113  2.8993572
## 114  1.3817653
## 115  1.3064159
## 116  1.5009058
## 117  0.9805226
## 118  0.5291316
## 119  1.2173787
## 120  1.3454971
## 121  0.7997121
## 122  3.5050706
## 123  2.3340512
## 124  0.7198759
## 125  3.1453012
## 126  0.8041845
## 127  0.7120282
## 128 42.5465857
## 129  1.5502634
## 130  0.7792558
## 131  2.7482274
## 132  3.2264419
## 133  0.4226582
## 134 10.8740024
## 135  5.3553074
## 136  0.9994443
## 137  3.3943630
## 138  0.9291465
## 139  1.3407478
## 140  1.9935349
## 141  0.4770747
## 142  0.1042325
## 143  4.3524372
## 144  2.2103423
## 145  0.6609023
## 146  8.4030618
## 147  3.3743764
## 148  0.5073514
## 149  1.7044418
## 150  1.5410855
## 151  0.3847506
## 152  3.0193048
## 153  0.9070579
## 154  1.3272201
## 155  2.3522548
## 156  2.5445481
## 157  0.6876293
## 158  3.5667410
## 159  2.1537566
## 160  0.4978944
## 161  2.8161047
## 162  2.7855383
## 163  0.5385669
## 164  2.6613682
## 165  1.5644107
## 166  1.0259073
## 167  7.8465918
## 168  1.2380353
## 169  0.6527615
## 170  4.5682636
## 171  3.7012640
## 172  0.6156442
## 173  2.8252628
## 174  3.3867061
## 175  1.2873257
## 176  2.6767060
## 177  1.8325356
## 178  0.6829452
## 179  4.6445638
## 180  1.7963710
## 181  0.6230833
## 182  9.8011109
## 183  2.4191894
## 184  0.7279224
## 185  2.6992618
## 186  1.3052830
## 187  0.8593663
## 188  3.8001120
## 189  2.1971873
## 190  1.6079320
## 191  3.2621867
## 192  2.4271209
## 193  1.2173374
## 194  9.9304159
## 195  3.4664371
## 196  1.0516653
## 197  4.7798921
## 198  1.7950466
## 199  0.8097779
## 200  3.0558600
## 201  1.7694414
## 202  0.6553908
## 203 14.4030572
## 204  1.0163265
write.csv(TrophicSummaries, "./output_data/TrophicSummaries.csv")

TrophicMean<-reshape(TrophicSummaries[, c(1:3, 7)], idvar="Lake_Name", timevar="variable", times=c("Mean", "SD"), direction = "wide")


colnames(TrophicMean) <- c("Lake_Name", "Secchi_Mean", "Secchi_SD", "TP_Mean", "TP_SD", "Chlorophyll-a_Mean", "Chlorophyll-a_SD")
TrophicMean<-TrophicMean[,c(1,4, 5, 6, 7, 2, 3)]

write.csv(TrophicMean, "./output_data/TrophicMeans.csv")

Histograms

First we will split our dataset based on information type

OWCSLAP<-CSLAP[CSLAP$Info_Type == "OW",]
BSCSLAP<-CSLAP[CSLAP$Info_Type == "BS",]
SBCSLAP<-CSLAP[CSLAP$Info_Type == "SB",]

redOWCSLAP<-redCSLAP[redCSLAP$Info_Type == "OW",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "BS",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "SB",]

Open Water TP

ggplot(OWCSLAP, aes(x=TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TP", x="TP (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 49 rows containing non-finite values (stat_bin).
## Warning: Removed 49 rows containing non-finite values (stat_density).

Open Water TN

ggplot(OWCSLAP, aes(x=TN)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN", x="TN (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 66 rows containing non-finite values (stat_bin).
## Warning: Removed 66 rows containing non-finite values (stat_density).

Open Water TN:TP

ggplot(OWCSLAP, aes(x=TN_TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN:TP", x="TN:TP", y="Frequency")+
  scale_x_continuous(limits=c(0,100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 180 rows containing non-finite values (stat_bin).
## Warning: Removed 180 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).

Open Water Chlorophyll-a

ggplot(OWCSLAP, aes(x=Extracted_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 114 rows containing non-finite values (stat_bin).
## Warning: Removed 114 rows containing non-finite values (stat_density).

Secchi Depth

ggplot(OWCSLAP, aes(x=Secchi_Depth)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Secchi Depth", x="Secchi depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 114 rows containing non-finite values (stat_bin).
## Warning: Removed 114 rows containing non-finite values (stat_density).

True Color

ggplot(OWCSLAP, aes(x=True_Color)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="True Color", x="True color (PTU)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 95 rows containing non-finite values (stat_bin).
## Warning: Removed 95 rows containing non-finite values (stat_density).

Shoreline Bloom Chlorophyll

ggplot(SBCSLAP, aes(x=ESF_FP_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
## Warning: Removed 2 rows containing non-finite values (stat_density).

Shoreline Bloom Microcystin

ggplot(SBCSLAP, aes(x=ESF_Microcystin)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 180 rows containing non-finite values (stat_bin).
## Warning: Removed 180 rows containing non-finite values (stat_density).

Open Water Microcystin

ggplot(OWCSLAP, aes(x=ESF_Microcystin)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open Water Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2634 rows containing non-finite values (stat_bin).
## Warning: Removed 2634 rows containing non-finite values (stat_density).

Mean Depth

#Quickly change factor level labels for Dreissenids
Lake_List[Lake_List$Dreissenids == 0, 5] <- rep("Uninvaded", 60)
Lake_List[Lake_List$Dreissenids == 1, 5] <- rep("Invaded", 8)
ggplot(Lake_List, aes(x=Mean_Depth_m)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Mean Depth", x="Mean depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

CA:SA

ggplot(Lake_List, aes(x=CA.SA)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Catchment Area to Surface Area Ratio", x="CA:SA", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Percent Agriculture

ggplot(Lake_List, aes(x=Percent_Ag)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Percent Agricultural Cover in Watershed", x="Agriculture (%)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (stat_bin).
## Warning: Removed 1 rows containing non-finite values (stat_density).

Reduced Dataset

Data Summaries

#melt data frame by Dreissenids and Info_type so we can summarize all our variables of interest in one code chunk 
redCSLAP2<-redCSLAP[, c(8, 13, 17, 21, 22, 28, 30, 32, 36, 37, 38, 50:54)]
redmeltCSLAP <- melt(redCSLAP2, id=c("Dreissenids", "Info_Type"), na.rm=TRUE)

#summarize each variable split by invasion status and info
redVariableSummaries<-ddply(redmeltCSLAP, c("Dreissenids", "Info_Type", "variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
redVariableSummaries
##    Dreissenids Info_Type        variable         Mean     Median         Min
## 1      Invaded        BS              TP 3.945294e-02   0.019850   0.0000000
## 2      Invaded        BS      Temp_Water 1.511196e+01  15.000000   5.0000000
## 3      Invaded        BS           CA.SA 2.788857e+01   6.507353   4.4873502
## 4      Invaded        BS    Mean_Depth_m 6.232740e+00   6.200000   3.7000000
## 5      Invaded        BS      Percent_Ag 2.046619e+01  20.000000   1.0000000
## 6      Invaded        OW    Secchi_Depth 4.536559e+00   4.400000   1.1000000
## 7      Invaded        OW              TP 1.276286e-02   0.011000   0.0036000
## 8      Invaded        OW      Temp_Water 2.330388e+01  24.000000  10.0000000
## 9      Invaded        OW              TN 5.232666e-01   0.368810   0.0000000
## 10     Invaded        OW      True_Color 1.295292e+01  11.000000   0.5000000
## 11     Invaded        OW         Calcium 2.842750e+01  24.450000  14.3000000
## 12     Invaded        OW Extracted_Chl.a 2.372222e+00   1.900000   0.0500000
## 13     Invaded        OW    ESF_FP_Chl.a 1.945223e+00   1.600000   0.0000000
## 14     Invaded        OW      ESF_FP_BGA 4.813312e-01   0.100000   0.0000000
## 15     Invaded        OW ESF_Microcystin 6.437536e-01   0.440000   0.3285191
## 16     Invaded        OW           TN_TP 4.676542e+01  31.182701   0.0000000
## 17     Invaded        OW           CA.SA 2.823056e+01  10.780370   4.4873502
## 18     Invaded        OW    Mean_Depth_m 6.233231e+00   6.200000   3.7000000
## 19     Invaded        OW      Percent_Ag 1.934462e+01  20.000000   1.0000000
## 20     Invaded        SB    Secchi_Depth 2.750000e+00   2.750000   2.7500000
## 21     Invaded        SB    ESF_FP_Chl.a 2.138201e+03 142.635000   0.2000000
## 22     Invaded        SB      ESF_FP_BGA 2.002831e+03  85.475000   0.0000000
## 23     Invaded        SB ESF_Microcystin 7.493377e+01  43.840108   0.8346926
## 24     Invaded        SB           CA.SA 1.424389e+01   4.807692   4.6535893
## 25     Invaded        SB    Mean_Depth_m 5.713333e+00   7.100000   3.7000000
## 26     Invaded        SB      Percent_Ag 3.256667e+01  21.500000  20.0000000
## 27   Uninvaded        BS    Secchi_Depth 5.100000e+00   5.100000   5.1000000
## 28   Uninvaded        BS              TP 3.269729e-02   0.012600   0.0005000
## 29   Uninvaded        BS      Temp_Water 1.499686e+01  16.000000   4.0000000
## 30   Uninvaded        BS              TN 3.730000e-01   0.373000   0.3730000
## 31   Uninvaded        BS      True_Color 1.700000e+01  17.000000  17.0000000
## 32   Uninvaded        BS Extracted_Chl.a 1.600000e+00   1.600000   1.6000000
## 33   Uninvaded        BS    ESF_FP_Chl.a 6.300000e-01   0.630000   0.6300000
## 34   Uninvaded        BS      ESF_FP_BGA 0.000000e+00   0.000000   0.0000000
## 35   Uninvaded        BS           TN_TP 8.197802e+00   8.197802   8.1978022
## 36   Uninvaded        BS           CA.SA 4.077164e+01  12.692967   6.5073529
## 37   Uninvaded        BS    Mean_Depth_m 8.546602e+00   8.500000   4.2000000
## 38   Uninvaded        BS      Percent_Ag 1.929450e+01  18.000000   0.0000000
## 39   Uninvaded        OW    Secchi_Depth 4.136219e+00   4.000000   1.0000000
## 40   Uninvaded        OW              TP 1.215861e-02   0.009600   0.0033000
## 41   Uninvaded        OW      Temp_Water 2.320556e+01  23.000000  13.0000000
## 42   Uninvaded        OW              TN 3.543052e-01   0.331000   0.1080000
## 43   Uninvaded        OW      True_Color 1.423788e+01  13.000000   0.5000000
## 44   Uninvaded        OW         Calcium 1.189190e+01   7.700000   1.5300000
## 45   Uninvaded        OW Extracted_Chl.a 3.309422e+00   2.300000   0.0500000
## 46   Uninvaded        OW    ESF_FP_Chl.a 2.736372e+00   1.610000   0.0000000
## 47   Uninvaded        OW      ESF_FP_BGA 1.112477e+00   0.110000   0.0000000
## 48   Uninvaded        OW ESF_Microcystin 5.172282e-01   0.420000   0.2044636
## 49   Uninvaded        OW           TN_TP 3.727631e+01  32.944785   5.2858958
## 50   Uninvaded        OW           CA.SA 4.595273e+01  17.211538   6.5073529
## 51   Uninvaded        OW    Mean_Depth_m 8.728443e+00   8.500000   4.2000000
## 52   Uninvaded        OW      Percent_Ag 1.678144e+01  18.000000   0.0000000
## 53   Uninvaded        SB    ESF_FP_Chl.a 1.560474e+03 632.615000   0.2800000
## 54   Uninvaded        SB      ESF_FP_BGA 9.629969e+02  31.275000   0.0000000
## 55   Uninvaded        SB ESF_Microcystin 2.760696e+02 276.069608 276.0696077
## 56   Uninvaded        SB           CA.SA 1.863356e+01  14.952253   6.5073529
## 57   Uninvaded        SB    Mean_Depth_m 6.887500e+00   7.550000   4.2000000
## 58   Uninvaded        SB      Percent_Ag 1.818750e+01  18.000000   1.0000000
##             Max           SD
## 1      0.374200 5.324365e-02
## 2     25.000000 5.298994e+00
## 3    189.855073 5.436477e+01
## 4      9.900000 1.727590e+00
## 5     48.000000 1.550414e+01
## 6      9.100000 1.395980e+00
## 7      0.066400 7.093941e-03
## 8     36.000000 2.860583e+00
## 9      4.124000 4.399775e-01
## 10    55.000000 9.245752e+00
## 11    57.800000 1.133176e+01
## 12    13.300000 1.839573e+00
## 13    12.150000 1.634573e+00
## 14    10.360000 9.835983e-01
## 15     1.840000 5.345261e-01
## 16   240.595238 4.110511e+01
## 17   189.855073 5.391221e+01
## 18     9.900000 1.697457e+00
## 19    48.000000 1.529962e+01
## 20     2.750000           NA
## 21 31507.500000 6.226951e+03
## 22 31507.500000 6.221888e+03
## 23   361.241045 9.244751e+01
## 24   189.855073 4.064373e+01
## 25     9.900000 1.901513e+00
## 26    48.000000 1.359881e+01
## 27     5.100000           NA
## 28     0.392400 5.827662e-02
## 29    29.000000 6.516206e+00
## 30     0.373000           NA
## 31    17.000000           NA
## 32     1.600000           NA
## 33     0.630000           NA
## 34     0.000000           NA
## 35     8.197802           NA
## 36   210.424710 6.629667e+01
## 37    21.300000 5.113367e+00
## 38    44.000000 1.692249e+01
## 39     8.700000 1.468001e+00
## 40     0.099200 1.067758e-02
## 41    33.000000 3.347532e+00
## 42     1.239000 1.536240e-01
## 43    59.000000 8.816877e+00
## 44    58.300000 1.279472e+01
## 45    24.600000 3.659556e+00
## 46    57.040000 5.021106e+00
## 47    31.020000 3.079598e+00
## 48     1.779509 3.929021e-01
## 49   112.636364 1.959529e+01
## 50   210.424710 7.014974e+01
## 51    21.300000 5.361541e+00
## 52    44.000000 1.561964e+01
## 53  8512.500000 2.245858e+03
## 54  8512.500000 2.253475e+03
## 55   276.069608           NA
## 56    31.521739 1.079730e+01
## 57     8.900000 1.949658e+00
## 58    41.000000 8.166752e+00
#Write this dataframe to a csv so the values can be easily put into a word document 
write.csv(redVariableSummaries, "./output_data/redVariableSummaries-June2020.csv")
#melt Lake List data frame by Dreissenids so we can summarize all our variables of interest in one code chunk 
redmeltLakes <- melt(df_match[,c(1:4,6)], id=c("LakeID", "Dreissenids"), na.rm=TRUE)

#summarize each variable split by invasion status and info
redLakeSummaries<-ddply(redmeltLakes, c("Dreissenids", "variable"), summarize,
      Mean = mean(value, na.rm=TRUE), 
      Median = median(value, na.rm=TRUE), 
      Min = min(value, na.rm=TRUE), 
      Max = max(value, na.rm=TRUE), 
      SD = sd(value, na.rm=TRUE))
redLakeSummaries
##   Dreissenids     variable     Mean    Median      Min      Max        SD
## 1           0        CA.SA 40.38083 14.952253 7.473684 210.4247 69.170089
## 2           0 Mean_Depth_m  8.48750  7.300000 4.200000  21.3000  5.521500
## 3           0   Percent_Ag 17.25000 16.000000 0.000000  44.0000 17.556033
## 4           1        CA.SA 32.57583  8.643862 4.487350 189.8551 63.897069
## 5           1 Mean_Depth_m  6.32500  6.400000 3.700000   9.9000  1.918146
## 6           1   Percent_Ag 21.00000 20.000000 1.000000  48.0000 15.874508
#Write this dataframe to a csv so the values can be easily put into a word document 
write.csv(redLakeSummaries, "./output_data/redLakeSummaries.csv")

Histograms

First we will split our dataset based on information type

redOWCSLAP<-redCSLAP[redCSLAP$Info_Type == "OW",]
redBSCSLAP<-redCSLAP[redCSLAP$Info_Type == "BS",]
redSBCSLAP<-redCSLAP[redCSLAP$Info_Type == "SB",]

Open Water TP

ggplot(redOWCSLAP, aes(x=TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TP", x="TP (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 13 rows containing non-finite values (stat_bin).
## Warning: Removed 13 rows containing non-finite values (stat_density).

Open Water TN

ggplot(redOWCSLAP, aes(x=TN)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN", x="TN (mg/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 20 rows containing non-finite values (stat_bin).
## Warning: Removed 20 rows containing non-finite values (stat_density).

Open Water TN:TP

ggplot(redOWCSLAP, aes(x=TN_TP)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water TN:TP", x="TN:TP", y="Frequency")+
  scale_x_continuous(limits=c(0,100))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 67 rows containing non-finite values (stat_bin).
## Warning: Removed 67 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing missing values (geom_bar).

Open Water Chlorophyll-a

ggplot(redOWCSLAP, aes(x=Extracted_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open water chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 24 rows containing non-finite values (stat_bin).
## Warning: Removed 24 rows containing non-finite values (stat_density).

Secchi Depth

ggplot(redOWCSLAP, aes(x=Secchi_Depth)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Secchi Depth", x="Secchi depth (m)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 30 rows containing non-finite values (stat_bin).
## Warning: Removed 30 rows containing non-finite values (stat_density).

True Color

ggplot(redOWCSLAP, aes(x=True_Color)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="True Color", x="True color (PTU)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 21 rows containing non-finite values (stat_bin).
## Warning: Removed 21 rows containing non-finite values (stat_density).

Shoreline Bloom Chlorophyll

ggplot(redSBCSLAP, aes(x=ESF_FP_Chl.a)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Chlorophyll a", x="Chlorophyll a (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Shoreline Bloom Microcystin

ggplot(redSBCSLAP, aes(x=ESF_Microcystin)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Shoreline Bloom Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 55 rows containing non-finite values (stat_bin).
## Warning: Removed 55 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.

Shoreline Bloom Microcystin

ggplot(redOWCSLAP, aes(x=ESF_Microcystin)) +
  geom_histogram(position="identity", color="black", fill="white")+
  facet_wrap(~Dreissenids)+ 
  geom_density()+
  theme_minimal()+
  labs(title="Open Water Microcystin", x="Microcystin (ug/L)", y="Frequency")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 633 rows containing non-finite values (stat_bin).
## Warning: Removed 633 rows containing non-finite values (stat_density).

Historical Analyses for Invaded Lakes

Secchi Depth

Because of the way the data were given to me, Secchi depth has to be done first on its own

hisCSLAP<-read.csv("./historical_data/Historical_CSLAP.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))

#extract sample year and month 
hisCSLAP$Sample_Year<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
hisCSLAP$Sample_Month<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))

#read in invasion year information 
Invasion_Year<-read.csv("./raw_data/InvasionYear.csv", header=TRUE, colClasses=c("factor", "numeric"))

#merge with hisCSLAP
hisCSLAP<-merge(hisCSLAP, Invasion_Year, all.x=TRUE)

#create new column `Years_Invaded` by subtracting `Invasion Year` from `Sample Year`
hisCSLAP$Years_Invaded<-hisCSLAP$Sample_Year-hisCSLAP$Invasion_Year

#turn `Sample_Year`, `Sample_Month`, and `Invasion_Year` back to factors
hisCSLAP$Sample_Year<-as.factor(hisCSLAP$Sample_Year)
hisCSLAP$Sample_Month<-as.factor(hisCSLAP$Sample_Month)
hisCSLAP$Invasion_Year<-as.factor(hisCSLAP$Invasion_Year)

#create column with 'Invaded' or "Uninvaded" based on `Years Invaded`
hisCSLAP$Dreissenids<-ifelse(hisCSLAP$Years_Invaded >= 0, "Invaded", "Uninvaded")
hisCSLAP$Dreissenids<-as.factor(hisCSLAP$Dreissenids)
#Aggregating yearly means by lake and years invaded
Secchi<-aggregate(Secchi_Depth_m ~ Lake_Name + Years_Invaded, data=hisCSLAP, FUN=mean)

Chl<-aggregate(Extracted_Chl_ug.L ~ Lake_Name + Years_Invaded, data=hisCSLAP, FUN=mean)
#t-test pre and post invasion and MEMs
hist(hisCSLAP$Secchi_Depth_m)

t.test(Secchi_Depth_m~Dreissenids, data=hisCSLAP)
## 
##  Welch Two Sample t-test
## 
## data:  Secchi_Depth_m by Dreissenids
## t = 10.37, df = 1054.5, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.6735570 0.9879379
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                4.672985                3.842238
Secchi.lmer<-lmer(Secchi_Depth_m ~ Dreissenids + (1|LakeID), data=hisCSLAP)

summary(Secchi.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Secchi_Depth_m ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP
## 
## REML criterion at convergence: 4388.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9324 -0.6854 -0.0512  0.6419  3.5186 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.4754   0.6895  
##  Residual             1.5707   1.2533  
## Number of obs: 1324, groups:  LakeID, 8
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)             4.62638    0.25053    7.48842  18.466 1.62e-07 ***
## DreissenidsUninvaded   -0.70457    0.07522 1320.02303  -9.367  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.175
plot(Secchi.lmer)

ggplot(hisCSLAP, aes(x=Dreissenids, y=Secchi_Depth_m))+
  geom_boxplot()+
  theme_minimal()+
  labs(y = "Secchi Depth (m)")
## Warning: Removed 779 rows containing non-finite values (stat_boxplot).

#ggsave("historical_Secchi.png")

Data Read In For the Rest of the Variables

#historicalCSLAP df edited to match DeRuyter and EatonBrook dfs
hisCSLAP<-read.csv("./historical_data/Historical_CSLAP_edited.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))

#DeRuyter and EatonBrook dfs
Eaton<-read.csv("./historical_data/EatonHistoricalClean2.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))
Der<-read.csv("./historical_data/DeruyterHistoricalClean2.csv", na.strings=c("", " ", "ND", "NA", "no filter", "nofilter"))

#change levels of Info_Type 
hisCSLAP$Info_Type[hisCSLAP$Info_Type == "ow"]<-"OW"
hisCSLAP$Info_Type[hisCSLAP$Info_Type == "sb"]<-"SB"
hisCSLAP$Info_Type<-as.factor(hisCSLAP$Info_Type)
hisCSLAP$Info_Type<-droplevels(hisCSLAP$Info_Type)
#Merge hisCSLAP with Deruyter and Eatonbrook 

#remove any observations from Eaton or DeRuyter from hisCSLAP 
hisCSLAP<-hisCSLAP[hisCSLAP$Lake_Name != "De Ruyter Reservoir",]
hisCSLAP<-hisCSLAP[hisCSLAP$Lake_Name != "Eaton Brook Reservoir",]

#row bind cleaned DeRuyter and EatonBrook 
hisCSLAP<-rbind(hisCSLAP, Eaton)
hisCSLAP<-rbind(hisCSLAP, Der)

Further organize hisCSLAP, and merge physical characteristics and invasion status

#extract sample year and month 
hisCSLAP$Sample_Year<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%Y"))
hisCSLAP$Sample_Month<-as.numeric(format(as.Date(hisCSLAP$Sample_Date, format="%m/%d/%Y"),"%m"))

#merge Invasion Year with hisCSLAP
hisCSLAP<-merge(hisCSLAP, Invasion_Year, all.x=TRUE)

#create new column `Years_Invaded` by subtracting `Invasion Year` from `Sample Year`
hisCSLAP$Years_Invaded<-hisCSLAP$Sample_Year-hisCSLAP$Invasion_Year

#turn `Sample_Year`, `Sample_Month`, and `Invasion_Year` back to factors
hisCSLAP$Sample_Year<-as.factor(hisCSLAP$Sample_Year)
hisCSLAP$Sample_Month<-as.factor(hisCSLAP$Sample_Month)
hisCSLAP$Invasion_Year<-as.factor(hisCSLAP$Invasion_Year)

#create column with 'Invaded' or "Uninvaded" based on `Years Invaded`
hisCSLAP$Dreissenids<-ifelse(hisCSLAP$Years_Invaded >= 0, "Invaded", "Uninvaded")
hisCSLAP$Dreissenids<-as.factor(hisCSLAP$Dreissenids)

#we will also remove and shoreline bloom samples 
hisCSLAP<-hisCSLAP[hisCSLAP$Info_Type != "SB",]
hisCSLAP$Info_Type<-droplevels(hisCSLAP$Info_Type)

#create a TN:TP column 
hisCSLAP$TN_TP<-hisCSLAP$TN_mg.L/hisCSLAP$TP_mg.L

#finally, create a df with only OW samples and a df with only BS samples 
OWhisCSLAP<-hisCSLAP[hisCSLAP$Info_Type == "OW",]
BShisCSLAP<-hisCSLAP[hisCSLAP$Info_Type == "BS",]

Aggregating yearly means by lake and years invaded

Chl<-aggregate(Extracted_Chl_ug.L ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

OWTP<-aggregate(TP_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

BSTP<-aggregate(TP_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "BS",], FUN=mean)

OWTN<-aggregate(TN_mg.L ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

OWTN_TP<-aggregate(TN_TP ~ Lake_Name + Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

TC<-aggregate(True_Color_PTU ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

Calc<-aggregate(Calcium_mg.L ~ Lake_Name + Years_Invaded, data=hisCSLAP[hisCSLAP$Info_Type == "OW",], FUN=mean)

How do lakes change after invasion? (t-test for pre and post invasion, then MEMs)

Extracted Chlorophyll-a

hist(log(OWhisCSLAP$Extracted_Chl_ug.L))

t.test(Extracted_Chl_ug.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  Extracted_Chl_ug.L by Dreissenids
## t = -12.443, df = 1320.3, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.624423 -1.909601
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                2.492524                4.759536
Chl.lmer<-lmer(log(Extracted_Chl_ug.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(Chl.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Extracted_Chl_ug.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 3050.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0999 -0.5200 -0.0071  0.5971  4.1661 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.04857  0.2204  
##  Residual             0.57543  0.7586  
## Number of obs: 1323, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)          6.034e-01  8.519e-02 7.636e+00   7.083  0.00013 ***
## DreissenidsUninvaded 6.015e-01  4.518e-02 1.318e+03  13.312  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.306
plot(Chl.lmer)

Open Water TP

hist(log(OWhisCSLAP$TP_mg.L))

t.test(TP_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  TP_mg.L by Dreissenids
## t = 3.1251, df = 760.3, p-value = 0.001845
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0003861491 0.0016908695
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##              0.01197581              0.01093730
OWTP.lmer<-lmer(log(TP_mg.L) ~ Dreissenids  + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(OWTP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP_mg.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 868.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.0407 -0.6192 -0.0512  0.5392  6.2139 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.03243  0.1801  
##  Residual             0.11009  0.3318  
## Number of obs: 1315, groups:  LakeID, 8
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)            -4.48194    0.06539    7.42433 -68.539 1.13e-11 ***
## DreissenidsUninvaded   -0.11493    0.01972 1310.85322  -5.827 7.10e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.171
plot(OWTP.lmer)

Bottom Sample TP

hist(log(BShisCSLAP$TP_mg.L))

t.test(TP_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "BS",])
## 
##  Welch Two Sample t-test
## 
## data:  TP_mg.L by Dreissenids
## t = -2.6076, df = 243.03, p-value = 0.009682
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.08319781 -0.01159341
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##              0.04436469              0.09176031
BSTP.lmer<-lmer(log(TP_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "BS",])

summary(BSTP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP_mg.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "BS", ]
## 
## REML criterion at convergence: 1778.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4685 -0.5772 -0.1504  0.5627  4.1494 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.4817   0.6941  
##  Residual             0.8566   0.9255  
## Number of obs: 651, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)           -3.78692    0.24993   7.12997 -15.152  1.1e-06 ***
## DreissenidsUninvaded   0.33838    0.08942 647.96512   3.784 0.000169 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.106
plot(BSTP.lmer)

Open Water TN

hist(log(OWhisCSLAP$TN_mg.L))

t.test(TN_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  TN_mg.L by Dreissenids
## t = -0.82352, df = 240.57, p-value = 0.411
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.12748917  0.05231878
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##               0.4895265               0.5271117
OWTN.lmer<-lmer(log(TN_mg.L) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(OWTN.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_mg.L) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 880.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.3884 -0.5927 -0.0039  0.5499  5.6278 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.2847   0.5336  
##  Residual             0.2084   0.4565  
## Number of obs: 661, groups:  LakeID, 8
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)   
## (Intercept)           -0.855286   0.189889   7.030046  -4.504  0.00275 **
## DreissenidsUninvaded   0.004495   0.048195 655.931431   0.093  0.92572   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.060

Open Water TN:TP

hist(log(OWhisCSLAP$TN_TP))

t.test(TN_TP ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  TN_TP by Dreissenids
## t = -0.65903, df = 251.56, p-value = 0.5105
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.553469   5.261396
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                45.67168                48.31771
OWTN_TP.lmer<-lmer(log(TN_TP) ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(OWTN_TP.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_TP) ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 1073.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2261 -0.5569 -0.0289  0.5560  4.2435 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.2491   0.4991  
##  Residual             0.2877   0.5364  
## Number of obs: 651, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            3.62840    0.17831   7.05445   20.35 1.59e-07 ***
## DreissenidsUninvaded   0.06740    0.05665 647.39946    1.19    0.235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.075
plot(OWTN_TP.lmer)

Open Water True Color

t.test(True_Color_PTU ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  True_Color_PTU by Dreissenids
## t = 11.754, df = 1008.7, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  5.747462 8.051237
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##               14.263158                7.363808
TC.lmer<-lmer(True_Color_PTU ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(TC.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: True_Color_PTU ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 9688.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3027 -0.5143 -0.1901  0.2417  7.1496 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 44.38    6.662   
##  Residual             85.84    9.265   
## Number of obs: 1325, groups:  LakeID, 8
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            15.1934     2.3925    7.0424   6.351 0.000376 ***
## DreissenidsUninvaded   -5.8055     0.5509 1319.0393 -10.537  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.132

Calcium

t.test(Calcium_mg.L ~ Dreissenids, data=hisCSLAP[hisCSLAP$Info_Type == "OW",])
## 
##  Welch Two Sample t-test
## 
## data:  Calcium_mg.L by Dreissenids
## t = -0.034601, df = 64.648, p-value = 0.9725
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.422633  4.272010
## sample estimates:
##   mean in group Invaded mean in group Uninvaded 
##                30.53299                30.60830
Calc.lmer<-lmer(Calcium_mg.L ~ Dreissenids + (1|LakeID), data=hisCSLAP[hisCSLAP$Info_Type == "OW",])

summary(Calc.lmer)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Calcium_mg.L ~ Dreissenids + (1 | LakeID)
##    Data: hisCSLAP[hisCSLAP$Info_Type == "OW", ]
## 
## REML criterion at convergence: 858.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7693 -0.3958 -0.0114  0.3219  3.5680 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 88.02    9.382   
##  Residual             43.69    6.610   
## Number of obs: 127, groups:  LakeID, 8
## 
## Fixed effects:
##                      Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)            31.031      3.414   7.283   9.090 3.13e-05 ***
## DreissenidsUninvaded    2.383      1.637 122.621   1.455    0.148    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.149
plot(Calc.lmer)

Boxplots

Chlorophyll-a

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=Extracted_Chl_ug.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Extracted Chlorophyll-a (mg/L)")
## Warning: Removed 54 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_Chl.png")

OWTP

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TP_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water TP (mg/L)")
## Warning: Removed 62 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTP.png")

BSTP

ggplot(hisCSLAP[hisCSLAP$Info_Type == "BS",], aes(x=Dreissenids, y=TP_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Bottom Sample TP (mg/L)")
## Warning: Removed 228 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_BSTP.png")

OWTN

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TN_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water TN (mg/L)")
## Warning: Removed 716 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTN.png")

OWTN_TP

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=TN_TP)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water TN:TP")
## Warning: Removed 726 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTN_TP.png")

OW TC

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=True_Color_PTU)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water True Color")
## Warning: Removed 52 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWTC.png")

OW Calcium

ggplot(hisCSLAP[hisCSLAP$Info_Type == "OW",], aes(x=Dreissenids, y=Calcium_mg.L)) + 
  geom_boxplot()+
  theme_bw()+
  labs(y = "Open Water Calcium")
## Warning: Removed 1250 rows containing non-finite values (stat_boxplot).

#ggsave("./output_figures/historical_OWCalcium.png")

Mixed Effects Models

Global Dataset

Open water TN:TP

TNTP<-lmer(log(TN_TP+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(TNTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN_TP + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 4288.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -14.8325  -0.4551   0.0402   0.4806   9.2136 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.052851 0.22989 
##  LakeID             (Intercept) 0.058346 0.24155 
##  Sample_Month       (Intercept) 0.001713 0.04139 
##  Sample_Year        (Intercept) 0.009578 0.09787 
##  Residual                       0.255404 0.50538 
## Number of obs: 2601, groups:  
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)           3.183775   0.165460 79.439722  19.242   <2e-16 ***
## DreissenidsUninvaded -0.032480   0.105180 92.981532  -0.309   0.7582    
## log(CA.SA)            0.048601   0.033665 61.368921   1.444   0.1539    
## log(Mean_Depth_m)     0.125403   0.052248 63.715137   2.400   0.0193 *  
## Percent_Ag            0.001651   0.002880 66.035094   0.573   0.5683    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.673                     
## log(CA.SA)  -0.458  0.044              
## lg(Mn_Dpt_) -0.520  0.087 -0.040       
## Percent_Ag  -0.279  0.326 -0.044 -0.053
plot(TNTP)

qqPlot(resid(TNTP))

## 1395 4458 
##  711 2353

Bottom Sample TP

BSTP<-lmer(log(TP+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=BSCSLAP)

summary(BSTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TP + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: BSCSLAP
## 
## REML criterion at convergence: 3060.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.4588 -0.4356 -0.0638  0.3495  4.9671 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.081919 0.28621 
##  LakeID             (Intercept) 0.387229 0.62228 
##  Sample_Month       (Intercept) 0.007195 0.08482 
##  Sample_Year        (Intercept) 0.003081 0.05551 
##  Residual                       0.243055 0.49301 
## Number of obs: 1808, groups:  
## Sample_Year:LakeID, 269; LakeID, 53; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)           -3.550746   0.448242  60.210119  -7.921 6.52e-11 ***
## DreissenidsUninvaded   0.246827   0.206337 134.468860   1.196  0.23371    
## log(CA.SA)            -0.013752   0.090666  48.401520  -0.152  0.88007    
## log(Mean_Depth_m)     -0.145212   0.203496  48.540796  -0.714  0.47890    
## Percent_Ag             0.022462   0.006911  50.849848   3.250  0.00205 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.478                     
## log(CA.SA)  -0.244  0.063              
## lg(Mn_Dpt_) -0.740  0.022 -0.252       
## Percent_Ag  -0.346  0.256 -0.116  0.177
plot(BSTP)

qqPlot(resid(BSTP))

## 3945  946 
## 1445  355

Open water TN

Full model error: Model failed to converge

-Remove Sample_Month. Sample_Year, Sample_Year:LakeID (low variance) to resolve error

TN<-lmer(log(TN+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +   (1|LakeID), data=OWCSLAP)

summary(TN)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(TN + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 2964
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.3622 -0.5006  0.0011  0.4902  8.9046 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 0.06457  0.2541  
##  Residual             0.16644  0.4080  
## Number of obs: 2638, groups:  LakeID, 67
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)           -0.977263   0.140314  91.572863  -6.965 4.89e-10 ***
## DreissenidsUninvaded  -0.075003   0.083028 198.446052  -0.903  0.36744    
## log(CA.SA)             0.047215   0.032257  62.385805   1.464  0.14828    
## log(Mean_Depth_m)     -0.137653   0.049696  63.297223  -2.770  0.00735 ** 
## Percent_Ag             0.012407   0.002696  67.534806   4.602 1.90e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.628                     
## log(CA.SA)  -0.512  0.042              
## lg(Mn_Dpt_) -0.568  0.071 -0.034       
## Percent_Ag  -0.238  0.276 -0.053 -0.063
plot(TN)

qqPlot(resid(TN))

## 3666 1395 
## 1966  711

Open water chlorophyll

Chl<-lmer(log(Extracted_Chl.a+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(Chl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(Extracted_Chl.a + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 5949.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.5567 -0.4517  0.0417  0.5127  4.1829 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.05400  0.2324  
##  LakeID             (Intercept) 0.21540  0.4641  
##  Sample_Month       (Intercept) 0.02576  0.1605  
##  Sample_Year        (Intercept) 0.01314  0.1146  
##  Residual                       0.49917  0.7065  
## Number of obs: 2591, groups:  
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)            1.355434   0.282568  85.411626   4.797 6.77e-06 ***
## DreissenidsUninvaded  -0.146767   0.167536 127.257977  -0.876  0.38266    
## log(CA.SA)             0.037582   0.060147  56.206917   0.625  0.53461    
## log(Mean_Depth_m)     -0.295017   0.092811  57.298075  -3.179  0.00239 ** 
## Percent_Ag             0.002679   0.005059  60.535425   0.529  0.59842    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.628                     
## log(CA.SA)  -0.476  0.042              
## lg(Mn_Dpt_) -0.533  0.078 -0.033       
## Percent_Ag  -0.244  0.294 -0.052 -0.061
plot(Chl)

qqPlot(resid(Chl))

## 1989 3530 
## 1003 1856

Secchi

Secchi<-lmer(log(Secchi_Depth+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(Secchi)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(Secchi_Depth + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: -311.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.9201 -0.5351  0.0207  0.5662  4.7949 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 1.150e-02 0.107227
##  LakeID             (Intercept) 7.080e-02 0.266084
##  Sample_Month       (Intercept) 4.551e-05 0.006746
##  Sample_Year        (Intercept) 8.639e-04 0.029393
##  Residual                       4.079e-02 0.201956
## Number of obs: 2590, groups:  
## Sample_Year:LakeID, 342; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)            1.063472   0.140820  95.144934   7.552 2.60e-11 ***
## DreissenidsUninvaded  -0.091870   0.078020 230.031893  -1.178   0.2402    
## log(CA.SA)            -0.071893   0.033512  62.546322  -2.145   0.0358 *  
## log(Mean_Depth_m)      0.336271   0.051584  63.305400   6.519 1.34e-08 ***
## Percent_Ag            -0.002311   0.002784  68.034599  -0.830   0.4093    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.586                     
## log(CA.SA)  -0.523  0.034              
## lg(Mn_Dpt_) -0.579  0.064 -0.033       
## Percent_Ag  -0.212  0.250 -0.059 -0.068
plot(Secchi)

qqPlot(resid(Secchi))

## 4582 4641 
## 2411 2440

Open Water True Color

TC<-lmer(log(True_Color+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(TC)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(True_Color + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | Sample_Year) + (1 | Sample_Month) + (1 |  
##     LakeID) + (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 2860.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -9.0242 -0.4258  0.0510  0.5180  3.4438 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.06792  0.2606  
##  LakeID             (Intercept) 0.18192  0.4265  
##  Sample_Month       (Intercept) 0.01635  0.1279  
##  Sample_Year        (Intercept) 0.07901  0.2811  
##  Residual                       0.13060  0.3614  
## Number of obs: 2612, groups:  
## Sample_Year:LakeID, 343; LakeID, 67; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                        Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)           2.179e+00  2.738e-01  7.195e+01   7.957 1.85e-11 ***
## DreissenidsUninvaded  3.631e-01  1.480e-01  1.507e+02   2.453  0.01532 *  
## log(CA.SA)            1.552e-01  5.483e-02  6.103e+01   2.830  0.00629 ** 
## log(Mean_Depth_m)    -2.406e-01  8.455e-02  6.207e+01  -2.846  0.00599 ** 
## Percent_Ag           -2.579e-04  4.599e-03  6.587e+01  -0.056  0.95545    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.572                     
## log(CA.SA)  -0.446  0.039              
## lg(Mn_Dpt_) -0.498  0.074 -0.033       
## Percent_Ag  -0.218  0.285 -0.054 -0.063
plot(TC)

qqPlot(resid(TC))

## 3656 4774 
## 1940 2531

Shoreline Bloom Chlorophyll a

Full model error: boundary (singular) fit and model failed to converge

Remove Sample_Year, Sample_Month, and Sample_Year:LakeID (low variance) to resolve

SBChl<-lmer(log(ESF_FP_Chl.a+.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) + Percent_Ag  + (1|LakeID), data=SBCSLAP)

summary(SBChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(ESF_FP_Chl.a + 0.01) ~ Dreissenids + log(CA.SA) + log(Mean_Depth_m) +  
##     Percent_Ag + (1 | LakeID)
##    Data: SBCSLAP
## 
## REML criterion at convergence: 990.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.96703 -0.56007  0.06596  0.62799  2.26269 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  LakeID   (Intercept) 2.293    1.514   
##  Residual             4.428    2.104   
## Number of obs: 219, groups:  LakeID, 42
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)   
## (Intercept)           5.12907    1.56564 35.94401   3.276  0.00234 **
## DreissenidsUninvaded -0.21254    0.98272 46.03089  -0.216  0.82973   
## log(CA.SA)            0.26531    0.33747 35.26516   0.786  0.43701   
## log(Mean_Depth_m)    -0.42431    0.50696 28.67188  -0.837  0.40953   
## Percent_Ag            0.03699    0.02638 35.94149   1.402  0.16953   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU l(CA.S l(M_D_
## DrssndsUnnv -0.710                     
## log(CA.SA)  -0.503  0.070              
## lg(Mn_Dpt_) -0.544  0.071  0.058       
## Percent_Ag  -0.392  0.501 -0.077 -0.090
plot(SBChl)

qqPlot(resid(SBChl))

##  614 4310 
##   44  195

Shoreline Bloom Microcystin

We want to add TN:TP as a predictor (fixed effect), but these variables aren’t collected with shoreline bloom data. So we will instead use the yearly average for a lake.

#Extracting average annual TN_TP for each lake 
library(plyr)
avgTNTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))
colnames(avgTNTP)[colnames(avgTNTP)=="Mean"] <- "TN_TP"

#Merge these values to the SBCSLAP df 
SBCSLAP<-SBCSLAP[,c(1:49, 51:54)]
SBCSLAP<-merge(SBCSLAP, avgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)

Full model error: boundary (singular) fit

Remove Sample_Month and Sample_Year:LakeID to resolve

SBmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m + Percent_Ag  + (1|Sample_Year) + (1|LakeID), data=SBCSLAP)

summary(SBmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m +  
##     Percent_Ag + (1 | Sample_Year) + (1 | LakeID)
##    Data: SBCSLAP
## 
## REML criterion at convergence: 176.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.28513 -0.53162  0.06219  0.53296  1.59305 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  LakeID      (Intercept) 1.846    1.359   
##  Sample_Year (Intercept) 5.793    2.407   
##  Residual                2.281    1.510   
## Number of obs: 43, groups:  LakeID, 7; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)          16.56817    6.64246  1.83807   2.494    0.141
## DreissenidsUninvaded -1.70297    2.24234  0.83298  -0.759    0.606
## TN_TP                -0.02691    0.02376  5.71424  -1.133    0.303
## log(CA.SA)           -3.02465    2.17918  1.75354  -1.388    0.315
## Mean_Depth_m         -0.66268    0.44983  1.60063  -1.473    0.307
## Percent_Ag           -0.06315    0.06490  1.31865  -0.973    0.475
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU TN_TP  l(CA.S Mn_Dp_
## DrssndsUnnv -0.285                            
## TN_TP       -0.031 -0.049                     
## log(CA.SA)  -0.850 -0.154 -0.040              
## Mean_Dpth_m -0.745  0.169 -0.283  0.583       
## Percent_Ag  -0.694  0.596 -0.245  0.415  0.506
plot(SBmicro)

qqPlot(resid(SBmicro))

## 60 36 
## 23 16

Open Water Microcystin

OWmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m + Percent_Ag  + (1|Sample_Year) + (1|LakeID) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=OWCSLAP)

summary(OWmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m +  
##     Percent_Ag + (1 | Sample_Year) + (1 | LakeID) + (1 | Sample_Month) +  
##     (1 | Sample_Year:LakeID)
##    Data: OWCSLAP
## 
## REML criterion at convergence: 198.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3744 -0.4953 -0.1004  0.3330  2.6445 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.14350  0.37882 
##  LakeID             (Intercept) 0.00265  0.05148 
##  Sample_Month       (Intercept) 0.02637  0.16239 
##  Sample_Year        (Intercept) 5.31141  2.30465 
##  Residual                       0.15525  0.39402 
## Number of obs: 99, groups:  
## Sample_Year:LakeID, 68; LakeID, 50; Sample_Month, 5; Sample_Year, 5
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)           0.249822   1.093728  4.539615   0.228   0.8293  
## DreissenidsUninvaded  0.101416   0.222614 38.515454   0.456   0.6513  
## TN_TP                -0.006788   0.003091 74.105964  -2.196   0.0312 *
## log(CA.SA)           -0.128848   0.082681 42.715034  -1.558   0.1265  
## Mean_Depth_m         -0.023865   0.023937 34.741718  -0.997   0.3257  
## Percent_Ag            0.010934   0.006191 39.154276   1.766   0.0852 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU TN_TP  l(CA.S Mn_Dp_
## DrssndsUnnv -0.193                            
## TN_TP       -0.088 -0.013                     
## log(CA.SA)  -0.130 -0.178  0.111              
## Mean_Dpth_m -0.124  0.144 -0.078  0.092       
## Percent_Ag  -0.083  0.504 -0.165 -0.286  0.004
plot(OWmicro)

qqPlot(resid(OWmicro))

## 2537  115 
##   63    2

Reduced Dataset

Open water TN:TP

redTNTP<-lmer(log(TN_TP+.01) ~ Dreissenids +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redTNTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(TN_TP + 0.01) ~ Dreissenids + (1 | Sample_Year) + (1 | Sample_Month) +  
##     (1 | LakeID) + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 1053
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -14.6301  -0.3793   0.0670   0.4569   4.2824 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 5.119e-02 0.226251
##  LakeID             (Intercept) 1.933e-01 0.439666
##  Sample_Month       (Intercept) 4.381e-05 0.006619
##  Sample_Year        (Intercept) 3.952e-03 0.062861
##  Residual                       2.563e-01 0.506224
## Number of obs: 631, groups:  
## Sample_Year:LakeID, 82; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           3.57036    0.14745 22.47365   24.21   <2e-16 ***
## DreissenidsUninvaded -0.09378    0.17050 47.13269   -0.55    0.585    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.604
plot(redTNTP)

qqPlot(resid(redTNTP))

## 359 866 
## 152 401

Bottom Sample TP

redBSTP<-lmer(log(TP+.01) ~ Dreissenids  +  (1|Sample_Year) + (1|Sample_Month) + (1|LakeID) + (1|Sample_Year:LakeID), data=redBSCSLAP)

summary(redBSTP)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(TP + 0.01) ~ Dreissenids + (1 | Sample_Year) + (1 | Sample_Month) +  
##     (1 | LakeID) + (1 | Sample_Year:LakeID)
##    Data: redBSCSLAP
## 
## REML criterion at convergence: 941.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9133 -0.5193 -0.0992  0.3425  4.2140 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.08718  0.29527 
##  LakeID             (Intercept) 0.19656  0.44335 
##  Sample_Month       (Intercept) 0.01368  0.11695 
##  Sample_Year        (Intercept) 0.00338  0.05813 
##  Residual                       0.23610  0.48590 
## Number of obs: 567, groups:  
## Sample_Year:LakeID, 86; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)          -3.41111    0.16546 21.79854 -20.616 8.73e-16 ***
## DreissenidsUninvaded -0.06031    0.19384 32.13280  -0.311    0.758    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.609
plot(redBSTP)

qqPlot(resid(redBSTP))

## 930 870 
## 393 367

Open Water Chlorophyll-a

redChl<-lmer(log(Extracted_Chl.a+1) ~ Dreissenids + (1|LakeID) + (1|Sample_Year) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Extracted_Chl.a + 1) ~ Dreissenids + (1 | LakeID) + (1 |  
##     Sample_Year) + (1 | Sample_Month) + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 888.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6791 -0.6123 -0.0104  0.5567  3.7645 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.0245791 0.1568  
##  LakeID             (Intercept) 0.0623684 0.2497  
##  Sample_Month       (Intercept) 0.0110021 0.1049  
##  Sample_Year        (Intercept) 0.0009305 0.0305  
##  Residual                       0.2031526 0.4507  
## Number of obs: 635, groups:  
## Sample_Year:LakeID, 83; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           1.21908    0.10166 18.65722  11.991 3.29e-10 ***
## DreissenidsUninvaded -0.03321    0.11175 28.88199  -0.297    0.768    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.573
plot(redChl)

qqPlot(resid(redChl))

## 433 161 
## 199  69

Open Water Secchi Depth

-Sample_Year and Sample_Month removed for having low variance

redSecchi <- lmer(log(Secchi_Depth) ~ Dreissenids  + (1|LakeID)+ (1|Sample_Year) + (1|Sample_Month) + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redSecchi)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(Secchi_Depth) ~ Dreissenids + (1 | LakeID) + (1 | Sample_Year) +  
##     (1 | Sample_Month) + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 151
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0631 -0.5199 -0.0037  0.5757  3.9560 
## 
## Random effects:
##  Groups             Name        Variance  Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.0221217 0.14873 
##  LakeID             (Intercept) 0.0470875 0.21700 
##  Sample_Month       (Intercept) 0.0002708 0.01646 
##  Sample_Year        (Intercept) 0.0005295 0.02301 
##  Residual                       0.0582462 0.24134 
## Number of obs: 629, groups:  
## Sample_Year:LakeID, 82; LakeID, 16; Sample_Month, 6; Sample_Year, 6
## 
## Fixed effects:
##                      Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)           1.44876    0.07569 21.04598  19.140 8.61e-15 ***
## DreissenidsUninvaded -0.08699    0.09117 39.48953  -0.954    0.346    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.627
plot(redSecchi)

qqPlot(resid(redSecchi))

##  959 1261 
##  439  595

Open Water True Color

Full model error: Model failed to converge

Remove Sample_Year and Sample_Month (low variance) to resolve

redOWTC<-lmer(True_Color ~ Dreissenids  + (1|LakeID) + + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redOWTC)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: True_Color ~ Dreissenids + (1 | LakeID) + +(1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 3965.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.2594 -0.4913 -0.0763  0.4095  5.1558 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 49.79    7.056   
##  LakeID             (Intercept) 18.05    4.249   
##  Residual                       19.51    4.417   
## Number of obs: 638, groups:  Sample_Year:LakeID, 83; LakeID, 16
## 
## Fixed effects:
##                      Estimate Std. Error     df t value Pr(>|t|)    
## (Intercept)            12.204      1.860 15.369   6.562 7.98e-06 ***
## DreissenidsUninvaded    2.918      2.497 19.403   1.168    0.257    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.699
plot(redOWTC)

qqPlot(resid(redOWTC))

## 318 311 
## 140 135

Shoreline Bloom Chlorophyll a

-Sample_Year removed for low variance

redSBChl<-lmer(log(ESF_FP_Chl.a) ~ Dreissenids + (1|Sample_Month) + (1|LakeID) , data=redSBCSLAP)

summary(redSBChl)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(ESF_FP_Chl.a) ~ Dreissenids + (1 | Sample_Month) + (1 | LakeID)
##    Data: redSBCSLAP
## 
## REML criterion at convergence: 342.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.49435 -0.52241  0.07225  0.49418  1.98859 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  LakeID       (Intercept) 2.057    1.434   
##  Sample_Month (Intercept) 1.822    1.350   
##  Residual                 4.384    2.094   
## Number of obs: 76, groups:  LakeID, 10; Sample_Month, 6
## 
## Fixed effects:
##                      Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)            5.9401     0.9878  7.9946   6.014 0.000319 ***
## DreissenidsUninvaded  -0.2759     1.0549 12.7054  -0.262 0.797879    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.534
plot(redSBChl)

qqPlot(resid(redSBChl))

## 777 319 
##  58  26

Shoreline Bloom Microcystin

Same as for the global dataset, we need to use average yearly TN:TP values

#Extracting average annual TP for each lake 
library(plyr)
redavgTNTP<-ddply(redOWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))
colnames(redavgTNTP)[colnames(redavgTNTP)=="Mean"] <- "TN_TP"

#Merge these values to the SBCSLAP df 
redSBCSLAP<-redSBCSLAP[,c(1:49, 51:54)]
redSBCSLAP<-merge(redSBCSLAP, redavgTNTP, by=c("LakeID", "Sample_Year"), all.x=TRUE, all.y=FALSE)

Full model error: failed to converge

Remove Sample_Month, LakeID, and Sample_Year:LakeID to resolve

redSBmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + log(TN_TP)  + (1|Sample_Year)  , data=redSBCSLAP)

summary(redSBmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(ESF_Microcystin) ~ Dreissenids + log(TN_TP) + (1 | Sample_Year)
##    Data: redSBCSLAP
## 
## REML criterion at convergence: 65.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.05259 -0.49531  0.08973  0.52344  1.73701 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  Sample_Year (Intercept) 0.6936   0.8328  
##  Residual                1.8834   1.3724  
## Number of obs: 20, groups:  Sample_Year, 5
## 
## Fixed effects:
##                      Estimate Std. Error      df t value Pr(>|t|)
## (Intercept)            6.4301     3.9041  1.6197   1.647    0.269
## DreissenidsUninvaded   1.8504     1.7803  4.7588   1.039    0.349
## log(TN_TP)            -0.7165     0.8957  1.5288  -0.800    0.529
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU
## DrssndsUnnv -0.343       
## log(TN_TP)  -0.990  0.303
plot(redSBmicro)

qqPlot(resid(redSBmicro))

## 25 19 
## 15 13

Open Water Microcystin

redOWmicro<-lmer(log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m + Percent_Ag  + (1|Sample_Year:LakeID), data=redOWCSLAP)

summary(redOWmicro)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## log(ESF_Microcystin) ~ Dreissenids + TN_TP + log(CA.SA) + Mean_Depth_m +  
##     Percent_Ag + (1 | Sample_Year:LakeID)
##    Data: redOWCSLAP
## 
## REML criterion at convergence: 51.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.95073 -0.50281 -0.02831  0.23061  2.81406 
## 
## Random effects:
##  Groups             Name        Variance Std.Dev.
##  Sample_Year:LakeID (Intercept) 0.06954  0.2637  
##  Residual                       0.12233  0.3498  
## Number of obs: 26, groups:  Sample_Year:LakeID, 17
## 
## Fixed effects:
##                       Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)          -0.141309   0.451400 15.547712  -0.313   0.7584  
## DreissenidsUninvaded -0.204587   0.220092 14.474357  -0.930   0.3678  
## TN_TP                -0.009539   0.004698 17.412334  -2.031   0.0579 .
## log(CA.SA)           -0.010593   0.143050 16.406507  -0.074   0.9419  
## Mean_Depth_m         -0.078616   0.057758 13.410642  -1.361   0.1959  
## Percent_Ag            0.015348   0.006687 15.981507   2.295   0.0356 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) DrssnU TN_TP  l(CA.S Mn_Dp_
## DrssndsUnnv -0.308                            
## TN_TP       -0.134  0.314                     
## log(CA.SA)  -0.209 -0.401 -0.520              
## Mean_Dpth_m -0.695  0.282  0.228 -0.374       
## Percent_Ag  -0.453  0.130 -0.289  0.114  0.169
plot(redOWmicro)

qqPlot(resid(redOWmicro))

##  225 1270 
##    7   26

HABs Frequency

#Global Dataset

#create column based on 25 ug/L BGA guideline (from NYS DEC)
CSLAP$Bloom <- ifelse(CSLAP$ESF_FP_BGA >=25.0, "Bloom", "No Bloom")
CSLAP$Bloom <- as.factor(CSLAP$Bloom)

#Separate based on invasion status and drop unused levels of Lake Name
I.CSLAP<-CSLAP[CSLAP$Dreissenids == "Invaded",]
I.CSLAP$Lake_Name<-droplevels(I.CSLAP$Lake_Name)

U.CSLAP<-CSLAP[CSLAP$Dreissenids == "Uninvaded",]
U.CSLAP$Lake_Name<-droplevels(U.CSLAP$Lake_Name)

#Reduced Dataset

#create column based on 25 ug/L BGA guideline (from NYS DEC)
redCSLAP$Bloom <- ifelse(redCSLAP$ESF_FP_BGA >=25.0, "Bloom", "No Bloom")
redCSLAP$Bloom <- as.factor(redCSLAP$Bloom)

#Separate based on invasion status and drop unused levels of Lake Name
I.redCSLAP<-redCSLAP[redCSLAP$Dreissenids == "Invaded",]
I.redCSLAP$Lake_Name<-droplevels(I.redCSLAP$Lake_Name)

U.redCSLAP<-redCSLAP[redCSLAP$Dreissenids == "Uninvaded",]
U.redCSLAP$Lake_Name<-droplevels(U.redCSLAP$Lake_Name)

Global Dataset: All Blooms

table(CSLAP[, c("Bloom", "Dreissenids")])
##           Dreissenids
## Bloom      Invaded Uninvaded
##   Bloom         42        98
##   No Bloom     326      2355
HABFreqYear<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Sample_Year")]))

HABFreqLake<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Lake_Name")]))

HABFreq<-as.data.frame(table(CSLAP[,c("Bloom","Dreissenids","Sample_Year", "Lake_Name")]))

#separate by invaded and uninvaded
I.blooms<-as.data.frame(table(I.CSLAP[,c("Lake_Name","Sample_Year", "Bloom")]))
I.blooms$Dreissenids<-rep("Invaded", 96)

U.blooms<-as.data.frame(table(U.CSLAP[,c("Lake_Name","Sample_Year", "Bloom")]))
U.blooms$Dreissenids<-rep("Uninvaded", 744)

#Make into one dataframe for analysis
blooms<-rbind(I.blooms, U.blooms)

#Remove 'no blooms' 
blooms<-blooms[blooms$Bloom == "Bloom",]

AOV

fit<-aov(Freq ~ Dreissenids, data=blooms)
summary(fit)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Dreissenids   1   15.9  15.901   12.37 0.000485 ***
## Residuals   418  537.4   1.286                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = blooms)
## 
## $Dreissenids
##                         diff        lwr        upr     p adj
## Uninvaded-Invaded -0.6115591 -0.9533921 -0.2697262 0.0004849

GLMER poisson

hist(blooms$Freq)

model<-glmer(Freq ~ Dreissenids + (1|Lake_Name) + (1|Sample_Year:Lake_Name), family=poisson(link=log), data=blooms)
summary(model)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name) + (1 | Sample_Year:Lake_Name)
##    Data: blooms
## 
##      AIC      BIC   logLik deviance df.resid 
##    482.6    498.8   -237.3    474.6      416 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.04168 -0.09715 -0.07750 -0.07750  1.79741 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev.
##  Sample_Year:Lake_Name (Intercept) 12.22    3.496   
##  Lake_Name             (Intercept)  1.51    1.229   
## Number of obs: 420, groups:  Sample_Year:Lake_Name, 408; Lake_Name, 68
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)    
## (Intercept)           -4.4006     1.3009  -3.383 0.000718 ***
## DreissenidsUninvaded  -0.5865     0.7018  -0.836 0.403307    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.652
res<-simulateResiduals(model)
plot(res)

testResiduals(res)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.038304, p-value = 0.5687
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.0084391, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000e+00, outHigh = 0.0000e+00, nobs = 4.2000e+02, freqH0 =
## 3.9841e-03, p-value = 0.374
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.038304, p-value = 0.5687
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.0084391, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000e+00, outHigh = 0.0000e+00, nobs = 4.2000e+02, freqH0 =
## 3.9841e-03, p-value = 0.374
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.99977, p-value = 0.96
## alternative hypothesis: two.sided

Reduced Dataset: All Blooms

redHABFreqYear<-as.data.frame(table(redCSLAP[,c("Bloom","Dreissenids","Sample_Year")]))

##Data organization for analysis 

#separate by invaded and uninvaded
I.redblooms<-as.data.frame(table(I.redCSLAP[,c("Lake_Name","Sample_Year","Bloom")]))
I.redblooms$Dreissenids<-rep("Invaded", 96)

U.redblooms<-as.data.frame(table(U.redCSLAP[,c("Lake_Name","Sample_Year","Bloom")]))
U.redblooms$Dreissenids<-rep("Uninvaded", 120)

#Make into one dataframe for analysis
redblooms<-rbind(I.redblooms, U.redblooms)

#Remove 'no blooms' 
redblooms<-redblooms[redblooms$Bloom == "Bloom",]

AOV

redfit<-aov(Freq ~ Dreissenids, data=redblooms)
summary(redfit)
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Dreissenids   1  13.38  13.380   6.767 0.0106 *
## Residuals   106 209.58   1.977                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(redfit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = redblooms)
## 
## $Dreissenids
##                         diff       lwr        upr     p adj
## Uninvaded-Invaded -0.7083333 -1.248186 -0.1684803 0.0106128

GLMER poisson

hist(redblooms$Freq)

redmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name) + (1|Sample_Year:Lake_Name), family=poisson(link=log), data=redblooms)
summary(redmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name) + (1 | Sample_Year:Lake_Name)
##    Data: redblooms
## 
##      AIC      BIC   logLik deviance df.resid 
##    163.2    173.9    -77.6    155.2      104 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -0.9165 -0.3324 -0.2376 -0.2098  2.4299 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev.
##  Sample_Year:Lake_Name (Intercept) 1.016    1.008   
##  Lake_Name             (Intercept) 2.024    1.423   
## Number of obs: 108, groups:  Sample_Year:Lake_Name, 96; Lake_Name, 16
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)   
## (Intercept)           -2.1308     0.7507  -2.838  0.00454 **
## DreissenidsUninvaded  -0.3365     0.7764  -0.433  0.66470   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.658
redres<-simulateResiduals(redmodel)
plot(redres)

testResiduals(redres)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.083157, p-value = 0.444
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 1.1098, p-value = 0.488
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000e+00, outHigh = 0.0000e+00, nobs = 1.0800e+02, freqH0 =
## 3.9841e-03, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.083157, p-value = 0.444
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 1.1098, p-value = 0.488
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000e+00, outHigh = 0.0000e+00, nobs = 1.0800e+02, freqH0 =
## 3.9841e-03, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(redres)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.0262, p-value = 0.856
## alternative hypothesis: two.sided

Global Dataset: HT Blooms

HTBloom<-CSLAP[which(CSLAP$Bloom == "Bloom"), ]

#create df of only OW blooms, then use 10 ug/L microcystin to get HT blooms (DEC open water guideline)
OWHTBloom<-HTBloom[HTBloom$Info_Type == "OW",]
OWHTBloom$HTBloom <- ifelse(OWHTBloom$ESF_Microcystin >= 10.0, "HT Bloom", "No HT Bloom")

#create df of only SB blooms, then use 20 ug/L microcystin to get HT blooms (DEC shoreline guideline)
SBHTBloom<-HTBloom[HTBloom$Info_Type == "SB",]
SBHTBloom$HTBloom <- ifelse(SBHTBloom$ESF_Microcystin >= 20.0, "HT Bloom", "No HT Bloom")

#combine OW and SB dfs 
allHTBloom<-rbind(OWHTBloom, SBHTBloom)

#Separate based on invasion status and drop unused levels of Lake Name
I.HTBloom<-allHTBloom[allHTBloom$Dreissenids == "Invaded",]
I.HTBloom$Lake_Name<-droplevels(I.HTBloom$Lake_Name)

U.HTBloom<-allHTBloom[allHTBloom$Dreissenids == "Uninvaded",]
U.HTBloom$Lake_Name<-droplevels(U.HTBloom$Lake_Name)
#separate by invaded and uninvaded
I.HTblooms<-as.data.frame(table(I.HTBloom[,c("Lake_Name","Sample_Year","HTBloom")]))
I.HTblooms$Dreissenids<-rep("Invaded", 40)

U.HTblooms<-as.data.frame(table(U.HTBloom[,c("Lake_Name","Sample_Year","HTBloom")]))
U.HTblooms$Dreissenids<-rep("Uninvaded", 348)

#Make into one dataframe for analysis
HTblooms<-rbind(I.HTblooms, U.HTblooms)

#Remove 'no blooms' 
HTblooms<-HTblooms[HTblooms$HTBloom == "HT Bloom",]

AOV

HTfit<-aov(Freq ~ Dreissenids, data=HTblooms)
summary(HTfit)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Dreissenids   1   6.14    6.14    11.8 0.000726 ***
## Residuals   192  99.90    0.52                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(HTfit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = HTblooms)
## 
## $Dreissenids
##                         diff        lwr        upr     p adj
## Uninvaded-Invaded -0.5850575 -0.9209807 -0.2491342 0.0007256

GLMER poisson

hist(HTblooms$Freq)

HTmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name) + (1|Sample_Year:Lake_Name), family=poisson(link=log), data=HTblooms)
summary(HTmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name) + (1 | Sample_Year:Lake_Name)
##    Data: HTblooms
## 
##      AIC      BIC   logLik deviance df.resid 
##    108.8    121.8    -50.4    100.8      190 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.12433 -0.01921 -0.01921 -0.01921  1.94337 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev.
##  Sample_Year:Lake_Name (Intercept)  0.4006  0.6329  
##  Lake_Name             (Intercept) 27.6627  5.2595  
## Number of obs: 194, groups:  Sample_Year:Lake_Name, 189; Lake_Name, 32
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -4.737      3.536  -1.340    0.180
## DreissenidsUninvaded   -3.107      2.977  -1.044    0.297
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.793
HTres<-simulateResiduals(HTmodel)
plot(HTres)

testResiduals(HTres)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.07558, p-value = 0.2177
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.00091238, p-value = 0.184
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000e+00, outHigh = 0.0000e+00, nobs = 1.9400e+02, freqH0 =
## 3.9841e-03, p-value = 0.9219
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.07558, p-value = 0.2177
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.00091238, p-value = 0.184
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000e+00, outHigh = 0.0000e+00, nobs = 1.9400e+02, freqH0 =
## 3.9841e-03, p-value = 0.9219
## alternative hypothesis: two.sided
testZeroInflation(HTres)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 1.0384, p-value = 0.488
## alternative hypothesis: two.sided

Reduced Dataset: HT Blooms

redHTBloom<-redCSLAP[which(redCSLAP$Bloom == "Bloom"), ]

#create df of only OW blooms, then use 10 ug/L microcystin to get HT blooms (DEC open water guideline)
redOWHTBloom<-redHTBloom[redHTBloom$Info_Type == "OW",]
redOWHTBloom$HTBloom <- ifelse(redOWHTBloom$ESF_Microcystin >= 10.0, "HT Bloom", "No HT Bloom")

#create df of only SB blooms, then use 20 ug/L microcystin to get HT blooms (DEC shoreline guideline)
redSBHTBloom<-redHTBloom[redHTBloom$Info_Type == "SB",]
redSBHTBloom$HTBloom <- ifelse(redSBHTBloom$ESF_Microcystin >= 20.0, "HT Bloom", "No HT Bloom")

#combine OW and SB dfs
redallHTBloom<-rbind(redOWHTBloom, redSBHTBloom)

#Separate based on invasion status and drop unused levels of Lake Name
redI.HTBloom<-redallHTBloom[redallHTBloom$Dreissenids == "Invaded",]
redI.HTBloom$Lake_Name<-droplevels(redI.HTBloom$Lake_Name)

redU.HTBloom<-redallHTBloom[redallHTBloom$Dreissenids == "Uninvaded",]
redU.HTBloom$Lake_Name<-droplevels(redU.HTBloom$Lake_Name)
#separate by invaded and uninvaded
redI.HTblooms<-as.data.frame(table(redI.HTBloom[,c("Lake_Name","Sample_Year", "HTBloom")]))
redI.HTblooms$Dreissenids<-rep("Invaded", 40)

redU.HTblooms<-as.data.frame(table(redU.HTBloom[,c("Lake_Name","Sample_Year", "HTBloom")]))
redU.HTblooms$Dreissenids<-rep("Uninvaded", 35)

#Make into one dataframe for analysis
redHTblooms<-rbind(redI.HTblooms, redU.HTblooms)

#Remove 'no blooms' 
redHTblooms<-redHTblooms[redHTblooms$HTBloom == "HT Bloom",]

AOV

redHTfit<-aov(Freq ~ Dreissenids, data=redHTblooms)
summary(redHTfit)
##             Df Sum Sq Mean Sq F value  Pr(>F)   
## Dreissenids  1   5.74   5.738   7.763 0.00738 **
## Residuals   53  39.17   0.739                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(redHTfit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Freq ~ Dreissenids, data = redHTblooms)
## 
## $Dreissenids
##                         diff       lwr       upr     p adj
## Uninvaded-Invaded -0.6714286 -1.154771 -0.188086 0.0073825

GLMER poisson

hist(redHTblooms$Freq)

redHTmodel<-glmer(Freq ~ Dreissenids + (1|Lake_Name) + (1|Sample_Year:Lake_Name), family=poisson(link=log), data=redHTblooms)
summary(redHTmodel)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: Freq ~ Dreissenids + (1 | Lake_Name) + (1 | Sample_Year:Lake_Name)
##    Data: redHTblooms
## 
##      AIC      BIC   logLik deviance df.resid 
##     51.2     59.3    -21.6     43.2       51 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -0.72112 -0.22040 -0.08331 -0.08331  3.01644 
## 
## Random effects:
##  Groups                Name        Variance Std.Dev.
##  Sample_Year:Lake_Name (Intercept) 0.1473   0.3838  
##  Lake_Name             (Intercept) 3.8701   1.9672  
## Number of obs: 55, groups:  Sample_Year:Lake_Name, 50; Lake_Name, 10
## 
## Fixed effects:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)            -2.077      1.412  -1.471    0.141
## DreissenidsUninvaded   -2.758      1.834  -1.504    0.133
## 
## Correlation of Fixed Effects:
##             (Intr)
## DrssndsUnnv -0.469
redHTres<-simulateResiduals(redHTmodel)
plot(redHTres)

testResiduals(redHTres)

## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.12652, p-value = 0.315
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.86448, p-value = 0.608
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 55.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
## $uniformity
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  simulationOutput$scaledResiduals
## D = 0.12652, p-value = 0.315
## alternative hypothesis: two-sided
## 
## 
## $dispersion
## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## ratioObsSim = 0.86448, p-value = 0.608
## alternative hypothesis: two.sided
## 
## 
## $outliers
## 
##  DHARMa outlier test based on exact binomial test
## 
## data:  simulationOutput
## outLow = 0.0000000, outHigh = 0.0000000, nobs = 55.0000000, freqH0 =
## 0.0039841, p-value = 1
## alternative hypothesis: two.sided
testZeroInflation(redHTres)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0.99635, p-value = 1
## alternative hypothesis: two.sided

Linear Regressions

Global Dataset

#Extracting average annual TP for each lake 
library(plyr)
avgTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TP, na.rm=TRUE))

#Extracting average annual TN:TP for each lake 
avgTNTP<-ddply(OWCSLAP, c("LakeID", "Sample_Year"), summarize, 
             Mean = mean(TN_TP, na.rm=TRUE))

#Taking only rows where there is a value for Microcystin 
Bloom<-CSLAP[!is.na(CSLAP$ESF_Microcystin),]

#Which rows are missing TP values? 
noTPbloom<-Bloom[is.na(Bloom$TP),]

#Which rows are missing TN:TP values? 
noTNTPbloom<-Bloom[is.na(Bloom$TN_TP),]

#merge average TP to noTPbloom data frame 
avgTPbloom<-merge(noTPbloom, avgTP, by=c("LakeID", "Sample_Year"), all.x=TRUE)

#merge average TN:TP to noTNTPbloom data frame 
avgTNTPbloom<-merge(noTNTPbloom, avgTNTP, by=c("LakeID", "Sample_Year"), all.x = TRUE)

#remove original TP column, and re-name the average TP column 
avgTPbloom<-avgTPbloom[, c(1:16,18:56)]
colnames(avgTPbloom)[colnames(avgTPbloom)=="Mean"] <- "TP"

#remove original TN:TP column, and re-name the average TN:TP column 
avgTNTPbloom<-avgTNTPbloom[, c(1:49, 51:56)]
colnames(avgTNTPbloom)[colnames(avgTNTPbloom)=="Mean"] <- "TN_TP"

#re-order columns 
avgTPbloom<-avgTPbloom[, c(1:16,55,17:54)]

#re-order columns 
avgTNTPbloom<-avgTNTPbloom[, c(1:49,55,50:54)]

#remove TN_TP from avgTPbloom and merge with avgTNTP bloom 
avgTPbloom<-avgTPbloom[,c(1:49,51:55)]
TNTPmerge<-avgTNTPbloom[,c(5,50)]
avgTPTNTPbloom<-merge(avgTPbloom, TNTPmerge, by="Sample_Name")

#re-order columns
avgTPTNTPbloom<-avgTPTNTPbloom[,c(1:49,55,50:54)]

#combine two data frames
hadTP<-Bloom[!is.na(Bloom$TP),]

completeBloom<-rbind(hadTP, avgTPTNTPbloom)


#Split by Info_Type
OWcompleteBloom<-completeBloom[completeBloom$Info_Type == "OW",]
SBcompleteBloom<-completeBloom[completeBloom$Info_Type == "SB",]

#split by invasion status 
OWinvadedBlooms<-OWcompleteBloom[OWcompleteBloom$Dreissenids == "Invaded",]
OWuninvadedBlooms<-OWcompleteBloom[OWcompleteBloom$Dreissenids == "Uninvaded",]

SBinvadedBlooms<-SBcompleteBloom[SBcompleteBloom$Dreissenids == "Invaded",]
SBuninvadedBlooms<-SBcompleteBloom[SBcompleteBloom$Dreissenids == "Uninvaded",]

TP and Microcystin

Open Water

#create linear models 
OWinvaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$TP+.1))
OWuninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$TP+.1))

summary(OWinvaded.lm)
## 
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48252 -0.17973  0.04444  0.11749  0.78628 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     8.6021     2.0963   4.103  0.00175 **
## log(OWinvadedBlooms$TP + 0.1)   4.2365     0.9805   4.321  0.00121 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3225 on 11 degrees of freedom
## Multiple R-squared:  0.6293, Adjusted R-squared:  0.5956 
## F-statistic: 18.67 on 1 and 11 DF,  p-value: 0.001213
summary(OWuninvaded.lm)
## 
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8498 -0.3228 -0.1828  0.0744  4.3311 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                       -4.072      3.407  -1.195    0.235
## log(OWuninvadedBlooms$TP + 0.1)   -1.642      1.557  -1.054    0.295
## 
## Residual standard error: 0.6902 on 87 degrees of freedom
## Multiple R-squared:  0.01261,    Adjusted R-squared:  0.001258 
## F-statistic: 1.111 on 1 and 87 DF,  p-value: 0.2948
OWinvaded.lm.sum<-summary(OWinvaded.lm)
OWuninvaded.lm.sum<-summary(OWuninvaded.lm)

#extract R2 and p-value 
r2<-OWinvaded.lm.sum$adj.r.squared
my.p<-OWinvaded.lm.sum$coefficients[2,4]

rp<-vector("expression", 2)
rp[1] <- substitute(expression(italic(R)^2 == MYVALUE), 
        list(MYVALUE = format(r2,dig=3)))[2]
rp[2] = substitute(expression(italic(p) == MYOTHERVALUE), 
        list(MYOTHERVALUE = format(my.p, digits = 2)))[2]
ggplot(OWcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  geom_smooth(data=subset(OWcompleteBloom, Dreissenids == "Invaded"),
              aes(x=TP, y=ESF_Microcystin, color=Dreissenids), method="lm", se=FALSE)+
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1, 10, 100, 10000)) +
  scale_color_grey()+
  theme(axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(size=14), 
        axis.title.x = element_text(size=14), legend.position = "top", legend.title = element_blank(), legend.text = element_text(size=12))+
  ylab("log open water microcystin (\u03bcg/L)")+
  xlab("log TP (mg/L)")+
  annotate(geom="text", x=.0075, y=10, label= "log microcystin = 8.6 + 4.24( log TP)")+
  annotate(geom="text", x=.005, y=7, label=rp[1], parse=TRUE)+
  annotate(geom="text", x=.005, y=5, label=rp[2], parse=TRUE)
## `geom_smooth()` using formula 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

ggsave("./output_figures/Fig 2-1 Global OW.png")
## Saving 7 x 5 in image
## `geom_smooth()` using formula 'y ~ x'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'

Shoreline Bloom

#create linear models 
SBinvaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$TP+.1))
SBuninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$TP+.1))

summary(SBinvaded.lm)
## 
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2455 -0.7569  0.0850  0.8098  2.3340 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     19.834     16.000   1.240    0.232
## log(SBinvadedBlooms$TP + 0.1)    7.589      7.386   1.027    0.319
## 
## Residual standard error: 1.431 on 17 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.05847,    Adjusted R-squared:  0.003082 
## F-statistic: 1.056 on 1 and 17 DF,  p-value: 0.3186
summary(SBuninvaded.lm)
## 
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1845 -1.8375 -0.3783  1.8765  5.8238 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        34.79      35.71   0.974    0.340
## log(SBuninvadedBlooms$TP + 0.1)    13.95      16.52   0.845    0.407
## 
## Residual standard error: 2.653 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0314, Adjusted R-squared:  -0.01262 
## F-statistic: 0.7133 on 1 and 22 DF,  p-value: 0.4074
SBinvaded.lm.sum<-summary(SBinvaded.lm)
SBuninvaded.lm.sum<-summary(SBuninvaded.lm)
ggplot(SBcompleteBloom, aes(x=TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()+
  theme(axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(size=14), 
        axis.title.x = element_text(size=14), legend.position = "top", legend.title = element_blank(), legend.text = element_text(size=12))+
  ylab("log shoreline bloom microcystin (\u03bcg/L)")+
  xlab("log TP (mg/L)")
## Warning: Removed 3 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-1 Global SB.png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).

TN:TP and Microcystin

Open Water

#create linear models 
OWTNTP_invaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$TN_TP+.1))
OWTNTP_uninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$TN_TP+.1))

summary(OWTNTP_invaded.lm)
## 
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37473 -0.23835 -0.22292 -0.06378  1.11484 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                        0.5400     0.7164   0.754    0.467
## log(OWinvadedBlooms$TN_TP + 0.1)  -0.2916     0.2077  -1.404    0.188
## 
## Residual standard error: 0.4878 on 11 degrees of freedom
## Multiple R-squared:  0.152,  Adjusted R-squared:  0.07489 
## F-statistic: 1.971 on 1 and 11 DF,  p-value: 0.1879
summary(OWTNTP_uninvaded.lm)
## 
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8382 -0.2960 -0.2138  0.0185  4.3962 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                         -0.8216     0.5559  -1.478    0.143
## log(OWuninvadedBlooms$TN_TP + 0.1)   0.1014     0.1626   0.624    0.534
## 
## Residual standard error: 0.6962 on 86 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.004504,   Adjusted R-squared:  -0.007072 
## F-statistic: 0.3891 on 1 and 86 DF,  p-value: 0.5344
OWTNTP_invaded.lm.sum<-summary(OWTNTP_invaded.lm)
OWTNTP_uninvaded.lm.sum<-summary(OWTNTP_uninvaded.lm)
ggplot(OWcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) +
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1, 10,100, 10000)) +
  scale_color_grey()+
  theme(axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(size=14), 
        axis.title.x = element_text(size=14), legend.position = "top", legend.title = element_blank(), legend.text=element_text(size=12))+
  ylab("log open water microcystin (\u03bcg/L)")+
  xlab("log TN:TP")
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-3 Global OW.png")
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).

Shoreline Bloom

#create linear models 
SBTNTP_invaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$TN_TP+.1))
SBTNTP_uninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$TN_TP+.1))

summary(SBTNTP_invaded.lm)
## 
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5667 -0.6502  0.1916  0.7317  2.4407 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                        5.8364     2.3907   2.441   0.0259 *
## log(SBinvadedBlooms$TN_TP + 0.1)  -0.5496     0.5338  -1.030   0.3176  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.431 on 17 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.0587, Adjusted R-squared:  0.003334 
## F-statistic:  1.06 on 1 and 17 DF,  p-value: 0.3176
summary(SBTNTP_uninvaded.lm)
## 
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$TN_TP + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7351 -1.2655 -0.2051  1.7642  5.8474 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)
## (Intercept)                          14.229      8.627   1.649    0.113
## log(SBuninvadedBlooms$TN_TP + 0.1)   -2.817      2.529  -1.114    0.277
## 
## Residual standard error: 2.622 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.05339,    Adjusted R-squared:  0.01036 
## F-statistic: 1.241 on 1 and 22 DF,  p-value: 0.2774
SBTNTP_invaded.lm.sum<-summary(SBTNTP_invaded.lm)
SBTNTP_uninvaded.lm.sum<-summary(SBTNTP_uninvaded.lm)
ggplot(SBcompleteBloom, aes(x=TN_TP, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) +
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10', breaks=c(1,100, 10000)) +
  scale_color_grey()+
  theme(axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(size=14), 
        axis.title.x = element_text(size=14), legend.position = "top", legend.title = element_blank(), legend.text=element_text(size=12))+
  ylab("log shoreline bloom microcystin (\u03bcg/L)")+
  xlab("log TN:TP")
## Warning: Removed 3 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-3 Global SB.png")
## Saving 7 x 5 in image
## Warning: Removed 3 rows containing missing values (geom_point).

Chlorophyll-a and Microcystin

Open Water

#create linear models
OWchl.invaded.lm<-lm(log(OWinvadedBlooms$ESF_Microcystin+.1)~log(OWinvadedBlooms$Extracted_Chl.a+.1))
OWchl.uninvaded.lm<-lm(log(OWuninvadedBlooms$ESF_Microcystin+.1)~log(OWuninvadedBlooms$Extracted_Chl.a+.1))

summary(OWchl.invaded.lm)
## 
## Call:
## lm(formula = log(OWinvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWinvadedBlooms$Extracted_Chl.a + 
##     0.1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.38713 -0.32568 -0.15193  0.03177  1.17554 
## 
## Coefficients:
##                                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 -0.3746     0.1675  -2.237   0.0469
## log(OWinvadedBlooms$Extracted_Chl.a + 0.1)  -0.1446     0.1739  -0.832   0.4234
##                                             
## (Intercept)                                *
## log(OWinvadedBlooms$Extracted_Chl.a + 0.1)  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5138 on 11 degrees of freedom
## Multiple R-squared:  0.05914,    Adjusted R-squared:  -0.02639 
## F-statistic: 0.6914 on 1 and 11 DF,  p-value: 0.4234
summary(OWchl.uninvaded.lm)
## 
## Call:
## lm(formula = log(OWuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(OWuninvadedBlooms$Extracted_Chl.a + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8337 -0.3284 -0.1780  0.0239  4.3742 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                  -0.38759    0.12036  -3.220
## log(OWuninvadedBlooms$Extracted_Chl.a + 0.1) -0.09242    0.09603  -0.962
##                                              Pr(>|t|)   
## (Intercept)                                   0.00181 **
## log(OWuninvadedBlooms$Extracted_Chl.a + 0.1)  0.33853   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6945 on 86 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.01066,    Adjusted R-squared:  -0.0008479 
## F-statistic: 0.9263 on 1 and 86 DF,  p-value: 0.3385
OWchl.invaded.lm.sum<-summary(OWchl.invaded.lm)
OWchl.uninvaded.lm.sum<-summary(OWchl.uninvaded.lm)
ggplot(OWcompleteBloom, aes(x=Extracted_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) + 
  theme_classic()+ 
  scale_x_continuous(trans='log10') +
  scale_y_continuous(trans='log10') + 
  scale_color_grey()+
  theme(axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(size=14), 
        axis.title.x = element_text(size=14), legend.position = "top", legend.title = element_blank(), legend.text=element_text(size=12))+
  ylab("log open water microcystin (\u03bcg/L)")+
  xlab("log extracted chlorophyll-a (\u03bcg/L)")
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave("./output_figures/Fig 2-2 Global OW.png")
## Saving 7 x 5 in image
## Warning: Removed 1 rows containing missing values (geom_point).

Shoreline Bloom

#create linear models
SBchl.invaded.lm<-lm(log(SBinvadedBlooms$ESF_Microcystin+.1)~log(SBinvadedBlooms$ESF_FP_Chl.a+.1))
SBchl.uninvaded.lm<-lm(log(SBuninvadedBlooms$ESF_Microcystin+.1)~log(SBuninvadedBlooms$ESF_FP_Chl.a+.1))

summary(SBchl.invaded.lm)
## 
## Call:
## lm(formula = log(SBinvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBinvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2713 -0.9313  0.2731  1.0336  2.3892 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                               2.7787     1.4482   1.919    0.071 .
## log(SBinvadedBlooms$ESF_FP_Chl.a + 0.1)   0.1318     0.2493   0.529    0.603  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.531 on 18 degrees of freedom
## Multiple R-squared:  0.01529,    Adjusted R-squared:  -0.03941 
## F-statistic: 0.2795 on 1 and 18 DF,  p-value: 0.6035
summary(SBchl.uninvaded.lm)
## 
## Call:
## lm(formula = log(SBuninvadedBlooms$ESF_Microcystin + 0.1) ~ log(SBuninvadedBlooms$ESF_FP_Chl.a + 
##     0.1))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2030 -1.9212 -0.1592  1.7338  5.5123 
## 
## Coefficients:
##                                           Estimate Std. Error t value Pr(>|t|)
## (Intercept)                                 1.1730     2.4462   0.480    0.636
## log(SBuninvadedBlooms$ESF_FP_Chl.a + 0.1)   0.4346     0.2928   1.484    0.151
## 
## Residual standard error: 2.498 on 24 degrees of freedom
## Multiple R-squared:  0.08409,    Adjusted R-squared:  0.04593 
## F-statistic: 2.204 on 1 and 24 DF,  p-value: 0.1507
SBchl.invaded.lm.sum<-summary(SBchl.invaded.lm)
SBchl.uninvaded.lm.sum<-summary(SBchl.uninvaded.lm)
ggplot(SBcompleteBloom, aes(x=ESF_FP_Chl.a, y=ESF_Microcystin, color=Dreissenids, shape=Dreissenids)) +
  geom_point(cex=2) +
  theme_classic()+ 
  scale_x_continuous(trans='log10', breaks=c(10,100,1000,10000)) +
  scale_y_continuous(trans='log10') + 
  scale_color_grey()+
  theme(axis.text.y = element_text(size=12), axis.text.x = element_text(size=12), axis.title.y = element_text(size=14), 
        axis.title.x = element_text(size=14), legend.position = "top", legend.title = element_blank(), legend.text=element_text(size=12))+
  ylab("log shoreline bloom microcystin (\u03bcg/L)")+
  xlab("log fluoroprobe chlorophyll-a (\u03bcg/L)")

ggsave("./output_figures/Fig 2-2 Global SB.png")
## Saving 7 x 5 in image